1/****************************************************************************
2 * Copyright (C) 2012-2015 Woboq GmbH
3 * Olivier Goffart <contact at woboq.com>
4 * https://woboq.com/codebrowser.html
5 *
6 * This file is part of the Woboq Code Browser.
7 *
8 * Commercial License Usage:
9 * Licensees holding valid commercial licenses provided by Woboq may use
10 * this file in accordance with the terms contained in a written agreement
11 * between the licensee and Woboq.
12 * For further information see https://woboq.com/codebrowser.html
13 *
14 * Alternatively, this work may be used under a Creative Commons
15 * Attribution-NonCommercial-ShareAlike 3.0 (CC-BY-NC-SA 3.0) License.
16 * http://creativecommons.org/licenses/by-nc-sa/3.0/deed.en_US
17 * This license does not allow you to use the code browser to assist the
18 * development of your commercial software. If you intent to do so, consider
19 * purchasing a commercial licence.
20 ****************************************************************************/
21
22
23#pragma once
24
25#include <utility>
26#include <vector>
27#include <string>
28
29
30struct EmbeddedFile {
31 const char *filename;
32 const char *content;
33 size_t size;
34 template <int N>
35 constexpr EmbeddedFile(const char *filename, const char (&data)[N])
36 : filename(filename) , content(data), size(N-1) {}
37 constexpr EmbeddedFile () : filename(nullptr) , content(nullptr), size(0) {}
38};
39
40static constexpr EmbeddedFile EmbeddedFiles[] = {
41 { "/builtins/__clang_cuda_builtin_vars.h" , "/*===---- cuda_builtin_vars.h - CUDA built-in variables ---------------------===\n"
42" *\n"
43" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
44" * of this software and associated documentation files (the \"Software\"), to deal\n"
45" * in the Software without restriction, including without limitation the rights\n"
46" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
47" * copies of the Software, and to permit persons to whom the Software is\n"
48" * furnished to do so, subject to the following conditions:\n"
49" *\n"
50" * The above copyright notice and this permission notice shall be included in\n"
51" * all copies or substantial portions of the Software.\n"
52" *\n"
53" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
54" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
55" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
56" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
57" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
58" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
59" * THE SOFTWARE.\n"
60" *\n"
61" *===-----------------------------------------------------------------------===\n"
62" */\n"
63"\n"
64"#ifndef __CUDA_BUILTIN_VARS_H\n"
65"#define __CUDA_BUILTIN_VARS_H\n"
66"\n"
67"// Forward declares from vector_types.h.\n"
68"struct uint3;\n"
69"struct dim3;\n"
70"\n"
71"// The file implements built-in CUDA variables using __declspec(property).\n"
72"// https://msdn.microsoft.com/en-us/library/yhfk0thd.aspx\n"
73"// All read accesses of built-in variable fields get converted into calls to a\n"
74"// getter function which in turn calls the appropriate builtin to fetch the\n"
75"// value.\n"
76"//\n"
77"// Example:\n"
78"// int x = threadIdx.x;\n"
79"// IR output:\n"
80"// %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #3\n"
81"// PTX output:\n"
82"// mov.u32 %r2, %tid.x;\n"
83"\n"
84"#define __CUDA_DEVICE_BUILTIN(FIELD, INTRINSIC) \\\n"
85" __declspec(property(get = __fetch_builtin_##FIELD)) unsigned int FIELD; \\\n"
86" static inline __attribute__((always_inline)) \\\n"
87" __attribute__((device)) unsigned int __fetch_builtin_##FIELD(void) { \\\n"
88" return INTRINSIC; \\\n"
89" }\n"
90"\n"
91"#if __cplusplus >= 201103L\n"
92"#define __DELETE =delete\n"
93"#else\n"
94"#define __DELETE\n"
95"#endif\n"
96"\n"
97"// Make sure nobody can create instances of the special variable types. nvcc\n"
98"// also disallows taking address of special variables, so we disable address-of\n"
99"// operator as well.\n"
100"#define __CUDA_DISALLOW_BUILTINVAR_ACCESS(TypeName) \\\n"
101" __attribute__((device)) TypeName() __DELETE; \\\n"
102" __attribute__((device)) TypeName(const TypeName &) __DELETE; \\\n"
103" __attribute__((device)) void operator=(const TypeName &) const __DELETE; \\\n"
104" __attribute__((device)) TypeName *operator&() const __DELETE\n"
105"\n"
106"struct __cuda_builtin_threadIdx_t {\n"
107" __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_tid_x());\n"
108" __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_tid_y());\n"
109" __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_tid_z());\n"
110" // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a\n"
111" // uint3). This function is defined after we pull in vector_types.h.\n"
112" __attribute__((device)) operator uint3() const;\n"
113"private:\n"
114" __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t);\n"
115"};\n"
116"\n"
117"struct __cuda_builtin_blockIdx_t {\n"
118" __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ctaid_x());\n"
119" __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ctaid_y());\n"
120" __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ctaid_z());\n"
121" // blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a\n"
122" // uint3). This function is defined after we pull in vector_types.h.\n"
123" __attribute__((device)) operator uint3() const;\n"
124"private:\n"
125" __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t);\n"
126"};\n"
127"\n"
128"struct __cuda_builtin_blockDim_t {\n"
129" __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ntid_x());\n"
130" __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ntid_y());\n"
131" __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ntid_z());\n"
132" // blockDim should be convertible to dim3 (in fact in nvcc, it *is* a\n"
133" // dim3). This function is defined after we pull in vector_types.h.\n"
134" __attribute__((device)) operator dim3() const;\n"
135"private:\n"
136" __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t);\n"
137"};\n"
138"\n"
139"struct __cuda_builtin_gridDim_t {\n"
140" __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_nctaid_x());\n"
141" __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_nctaid_y());\n"
142" __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_nctaid_z());\n"
143" // gridDim should be convertible to dim3 (in fact in nvcc, it *is* a\n"
144" // dim3). This function is defined after we pull in vector_types.h.\n"
145" __attribute__((device)) operator dim3() const;\n"
146"private:\n"
147" __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t);\n"
148"};\n"
149"\n"
150"#define __CUDA_BUILTIN_VAR \\\n"
151" extern const __attribute__((device)) __attribute__((weak))\n"
152"__CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;\n"
153"__CUDA_BUILTIN_VAR __cuda_builtin_blockIdx_t blockIdx;\n"
154"__CUDA_BUILTIN_VAR __cuda_builtin_blockDim_t blockDim;\n"
155"__CUDA_BUILTIN_VAR __cuda_builtin_gridDim_t gridDim;\n"
156"\n"
157"// warpSize should translate to read of %WARP_SZ but there's currently no\n"
158"// builtin to do so. According to PTX v4.2 docs 'to date, all target\n"
159"// architectures have a WARP_SZ value of 32'.\n"
160"__attribute__((device)) const int warpSize = 32;\n"
161"\n"
162"#undef __CUDA_DEVICE_BUILTIN\n"
163"#undef __CUDA_BUILTIN_VAR\n"
164"#undef __CUDA_DISALLOW_BUILTINVAR_ACCESS\n"
165"\n"
166"#endif /* __CUDA_BUILTIN_VARS_H */\n"
167"" } ,
168 { "/builtins/__clang_cuda_cmath.h" , "/*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------===\n"
169" *\n"
170" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
171" * of this software and associated documentation files (the \"Software\"), to deal\n"
172" * in the Software without restriction, including without limitation the rights\n"
173" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
174" * copies of the Software, and to permit persons to whom the Software is\n"
175" * furnished to do so, subject to the following conditions:\n"
176" *\n"
177" * The above copyright notice and this permission notice shall be included in\n"
178" * all copies or substantial portions of the Software.\n"
179" *\n"
180" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
181" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
182" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
183" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
184" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
185" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
186" * THE SOFTWARE.\n"
187" *\n"
188" *===-----------------------------------------------------------------------===\n"
189" */\n"
190"#ifndef __CLANG_CUDA_CMATH_H__\n"
191"#define __CLANG_CUDA_CMATH_H__\n"
192"#ifndef __CUDA__\n"
193"#error \"This file is for CUDA compilation only.\"\n"
194"#endif\n"
195"\n"
196"#include <limits>\n"
197"\n"
198"// CUDA lets us use various std math functions on the device side. This file\n"
199"// works in concert with __clang_cuda_math_forward_declares.h to make this work.\n"
200"//\n"
201"// Specifically, the forward-declares header declares __device__ overloads for\n"
202"// these functions in the global namespace, then pulls them into namespace std\n"
203"// with 'using' statements. Then this file implements those functions, after\n"
204"// their implementations have been pulled in.\n"
205"//\n"
206"// It's important that we declare the functions in the global namespace and pull\n"
207"// them into namespace std with using statements, as opposed to simply declaring\n"
208"// these functions in namespace std, because our device functions need to\n"
209"// overload the standard library functions, which may be declared in the global\n"
210"// namespace or in std, depending on the degree of conformance of the stdlib\n"
211"// implementation. Declaring in the global namespace and pulling into namespace\n"
212"// std covers all of the known knowns.\n"
213"\n"
214"#define __DEVICE__ static __device__ __inline__ __attribute__((always_inline))\n"
215"\n"
216"__DEVICE__ long long abs(long long __n) { return ::llabs(__n); }\n"
217"__DEVICE__ long abs(long __n) { return ::labs(__n); }\n"
218"__DEVICE__ float abs(float __x) { return ::fabsf(__x); }\n"
219"__DEVICE__ double abs(double __x) { return ::fabs(__x); }\n"
220"__DEVICE__ float acos(float __x) { return ::acosf(__x); }\n"
221"__DEVICE__ float asin(float __x) { return ::asinf(__x); }\n"
222"__DEVICE__ float atan(float __x) { return ::atanf(__x); }\n"
223"__DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); }\n"
224"__DEVICE__ float ceil(float __x) { return ::ceilf(__x); }\n"
225"__DEVICE__ float cos(float __x) { return ::cosf(__x); }\n"
226"__DEVICE__ float cosh(float __x) { return ::coshf(__x); }\n"
227"__DEVICE__ float exp(float __x) { return ::expf(__x); }\n"
228"__DEVICE__ float fabs(float __x) { return ::fabsf(__x); }\n"
229"__DEVICE__ float floor(float __x) { return ::floorf(__x); }\n"
230"__DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); }\n"
231"__DEVICE__ int fpclassify(float __x) {\n"
232" return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,\n"
233" FP_ZERO, __x);\n"
234"}\n"
235"__DEVICE__ int fpclassify(double __x) {\n"
236" return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,\n"
237" FP_ZERO, __x);\n"
238"}\n"
239"__DEVICE__ float frexp(float __arg, int *__exp) {\n"
240" return ::frexpf(__arg, __exp);\n"
241"}\n"
242"\n"
243"// For inscrutable reasons, the CUDA headers define these functions for us on\n"
244"// Windows.\n"
245"#ifndef _MSC_VER\n"
246"__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }\n"
247"__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }\n"
248"__DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }\n"
249"// For inscrutable reasons, __finite(), the double-precision version of\n"
250"// __finitef, does not exist when compiling for MacOS. __isfinited is available\n"
251"// everywhere and is just as good.\n"
252"__DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); }\n"
253"__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }\n"
254"__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }\n"
255"#endif\n"
256"\n"
257"__DEVICE__ bool isgreater(float __x, float __y) {\n"
258" return __builtin_isgreater(__x, __y);\n"
259"}\n"
260"__DEVICE__ bool isgreater(double __x, double __y) {\n"
261" return __builtin_isgreater(__x, __y);\n"
262"}\n"
263"__DEVICE__ bool isgreaterequal(float __x, float __y) {\n"
264" return __builtin_isgreaterequal(__x, __y);\n"
265"}\n"
266"__DEVICE__ bool isgreaterequal(double __x, double __y) {\n"
267" return __builtin_isgreaterequal(__x, __y);\n"
268"}\n"
269"__DEVICE__ bool isless(float __x, float __y) {\n"
270" return __builtin_isless(__x, __y);\n"
271"}\n"
272"__DEVICE__ bool isless(double __x, double __y) {\n"
273" return __builtin_isless(__x, __y);\n"
274"}\n"
275"__DEVICE__ bool islessequal(float __x, float __y) {\n"
276" return __builtin_islessequal(__x, __y);\n"
277"}\n"
278"__DEVICE__ bool islessequal(double __x, double __y) {\n"
279" return __builtin_islessequal(__x, __y);\n"
280"}\n"
281"__DEVICE__ bool islessgreater(float __x, float __y) {\n"
282" return __builtin_islessgreater(__x, __y);\n"
283"}\n"
284"__DEVICE__ bool islessgreater(double __x, double __y) {\n"
285" return __builtin_islessgreater(__x, __y);\n"
286"}\n"
287"__DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); }\n"
288"__DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); }\n"
289"__DEVICE__ bool isunordered(float __x, float __y) {\n"
290" return __builtin_isunordered(__x, __y);\n"
291"}\n"
292"__DEVICE__ bool isunordered(double __x, double __y) {\n"
293" return __builtin_isunordered(__x, __y);\n"
294"}\n"
295"__DEVICE__ float ldexp(float __arg, int __exp) {\n"
296" return ::ldexpf(__arg, __exp);\n"
297"}\n"
298"__DEVICE__ float log(float __x) { return ::logf(__x); }\n"
299"__DEVICE__ float log10(float __x) { return ::log10f(__x); }\n"
300"__DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }\n"
301"__DEVICE__ float pow(float __base, float __exp) {\n"
302" return ::powf(__base, __exp);\n"
303"}\n"
304"__DEVICE__ float pow(float __base, int __iexp) {\n"
305" return ::powif(__base, __iexp);\n"
306"}\n"
307"__DEVICE__ double pow(double __base, int __iexp) {\n"
308" return ::powi(__base, __iexp);\n"
309"}\n"
310"__DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); }\n"
311"__DEVICE__ bool signbit(double __x) { return ::__signbitd(__x); }\n"
312"__DEVICE__ float sin(float __x) { return ::sinf(__x); }\n"
313"__DEVICE__ float sinh(float __x) { return ::sinhf(__x); }\n"
314"__DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); }\n"
315"__DEVICE__ float tan(float __x) { return ::tanf(__x); }\n"
316"__DEVICE__ float tanh(float __x) { return ::tanhf(__x); }\n"
317"\n"
318"// Notably missing above is nexttoward. We omit it because\n"
319"// libdevice doesn't provide an implementation, and we don't want to be in the\n"
320"// business of implementing tricky libm functions in this header.\n"
321"\n"
322"// Now we've defined everything we promised we'd define in\n"
323"// __clang_cuda_math_forward_declares.h. We need to do two additional things to\n"
324"// fix up our math functions.\n"
325"//\n"
326"// 1) Define __device__ overloads for e.g. sin(int). The CUDA headers define\n"
327"// only sin(float) and sin(double), which means that e.g. sin(0) is\n"
328"// ambiguous.\n"
329"//\n"
330"// 2) Pull the __device__ overloads of \"foobarf\" math functions into namespace\n"
331"// std. These are defined in the CUDA headers in the global namespace,\n"
332"// independent of everything else we've done here.\n"
333"\n"
334"// We can't use std::enable_if, because we want to be pre-C++11 compatible. But\n"
335"// we go ahead and unconditionally define functions that are only available when\n"
336"// compiling for C++11 to match the behavior of the CUDA headers.\n"
337"template<bool __B, class __T = void>\n"
338"struct __clang_cuda_enable_if {};\n"
339"\n"
340"template <class __T> struct __clang_cuda_enable_if<true, __T> {\n"
341" typedef __T type;\n"
342"};\n"
343"\n"
344"// Defines an overload of __fn that accepts one integral argument, calls\n"
345"// __fn((double)x), and returns __retty.\n"
346"#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn) \\\n"
347" template <typename __T> \\\n"
348" __DEVICE__ \\\n"
349" typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, \\\n"
350" __retty>::type \\\n"
351" __fn(__T __x) { \\\n"
352" return ::__fn((double)__x); \\\n"
353" }\n"
354"\n"
355"// Defines an overload of __fn that accepts one two arithmetic arguments, calls\n"
356"// __fn((double)x, (double)y), and returns a double.\n"
357"//\n"
358"// Note this is different from OVERLOAD_1, which generates an overload that\n"
359"// accepts only *integral* arguments.\n"
360"#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn) \\\n"
361" template <typename __T1, typename __T2> \\\n"
362" __DEVICE__ typename __clang_cuda_enable_if< \\\n"
363" std::numeric_limits<__T1>::is_specialized && \\\n"
364" std::numeric_limits<__T2>::is_specialized, \\\n"
365" __retty>::type \\\n"
366" __fn(__T1 __x, __T2 __y) { \\\n"
367" return __fn((double)__x, (double)__y); \\\n"
368" }\n"
369"\n"
370"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acos)\n"
371"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acosh)\n"
372"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asin)\n"
373"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asinh)\n"
374"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atan)\n"
375"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, atan2);\n"
376"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atanh)\n"
377"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cbrt)\n"
378"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, ceil)\n"
379"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, copysign);\n"
380"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cos)\n"
381"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cosh)\n"
382"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erf)\n"
383"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erfc)\n"
384"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp)\n"
385"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp2)\n"
386"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, expm1)\n"
387"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, fabs)\n"
388"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fdim);\n"
389"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, floor)\n"
390"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmax);\n"
391"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmin);\n"
392"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmod);\n"
393"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, fpclassify)\n"
394"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, hypot);\n"
395"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, ilogb)\n"
396"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isfinite)\n"
397"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreater);\n"
398"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreaterequal);\n"
399"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isinf);\n"
400"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isless);\n"
401"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessequal);\n"
402"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessgreater);\n"
403"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnan);\n"
404"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnormal)\n"
405"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isunordered);\n"
406"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, lgamma)\n"
407"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log)\n"
408"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log10)\n"
409"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log1p)\n"
410"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log2)\n"
411"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, logb)\n"
412"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llrint)\n"
413"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llround)\n"
414"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lrint)\n"
415"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lround)\n"
416"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, nearbyint);\n"
417"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter);\n"
418"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow);\n"
419"__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder);\n"
420"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint);\n"
421"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round);\n"
422"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit)\n"
423"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin)\n"
424"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh)\n"
425"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sqrt)\n"
426"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tan)\n"
427"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tanh)\n"
428"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tgamma)\n"
429"__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, trunc);\n"
430"\n"
431"#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1\n"
432"#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2\n"
433"\n"
434"// Overloads for functions that don't match the patterns expected by\n"
435"// __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}.\n"
436"template <typename __T1, typename __T2, typename __T3>\n"
437"__DEVICE__ typename __clang_cuda_enable_if<\n"
438" std::numeric_limits<__T1>::is_specialized &&\n"
439" std::numeric_limits<__T2>::is_specialized &&\n"
440" std::numeric_limits<__T3>::is_specialized,\n"
441" double>::type\n"
442"fma(__T1 __x, __T2 __y, __T3 __z) {\n"
443" return std::fma((double)__x, (double)__y, (double)__z);\n"
444"}\n"
445"\n"
446"template <typename __T>\n"
447"__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,\n"
448" double>::type\n"
449"frexp(__T __x, int *__exp) {\n"
450" return std::frexp((double)__x, __exp);\n"
451"}\n"
452"\n"
453"template <typename __T>\n"
454"__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,\n"
455" double>::type\n"
456"ldexp(__T __x, int __exp) {\n"
457" return std::ldexp((double)__x, __exp);\n"
458"}\n"
459"\n"
460"template <typename __T1, typename __T2>\n"
461"__DEVICE__ typename __clang_cuda_enable_if<\n"
462" std::numeric_limits<__T1>::is_specialized &&\n"
463" std::numeric_limits<__T2>::is_specialized,\n"
464" double>::type\n"
465"remquo(__T1 __x, __T2 __y, int *__quo) {\n"
466" return std::remquo((double)__x, (double)__y, __quo);\n"
467"}\n"
468"\n"
469"template <typename __T>\n"
470"__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,\n"
471" double>::type\n"
472"scalbln(__T __x, long __exp) {\n"
473" return std::scalbln((double)__x, __exp);\n"
474"}\n"
475"\n"
476"template <typename __T>\n"
477"__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,\n"
478" double>::type\n"
479"scalbn(__T __x, int __exp) {\n"
480" return std::scalbn((double)__x, __exp);\n"
481"}\n"
482"\n"
483"// We need to define these overloads in exactly the namespace our standard\n"
484"// library uses (including the right inline namespace), otherwise they won't be\n"
485"// picked up by other functions in the standard library (e.g. functions in\n"
486"// <complex>). Thus the ugliness below.\n"
487"#ifdef _LIBCPP_BEGIN_NAMESPACE_STD\n"
488"_LIBCPP_BEGIN_NAMESPACE_STD\n"
489"#else\n"
490"namespace std {\n"
491"#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION\n"
492"_GLIBCXX_BEGIN_NAMESPACE_VERSION\n"
493"#endif\n"
494"#endif\n"
495"\n"
496"// Pull the new overloads we defined above into namespace std.\n"
497"using ::acos;\n"
498"using ::acosh;\n"
499"using ::asin;\n"
500"using ::asinh;\n"
501"using ::atan;\n"
502"using ::atan2;\n"
503"using ::atanh;\n"
504"using ::cbrt;\n"
505"using ::ceil;\n"
506"using ::copysign;\n"
507"using ::cos;\n"
508"using ::cosh;\n"
509"using ::erf;\n"
510"using ::erfc;\n"
511"using ::exp;\n"
512"using ::exp2;\n"
513"using ::expm1;\n"
514"using ::fabs;\n"
515"using ::fdim;\n"
516"using ::floor;\n"
517"using ::fma;\n"
518"using ::fmax;\n"
519"using ::fmin;\n"
520"using ::fmod;\n"
521"using ::fpclassify;\n"
522"using ::frexp;\n"
523"using ::hypot;\n"
524"using ::ilogb;\n"
525"using ::isfinite;\n"
526"using ::isgreater;\n"
527"using ::isgreaterequal;\n"
528"using ::isless;\n"
529"using ::islessequal;\n"
530"using ::islessgreater;\n"
531"using ::isnormal;\n"
532"using ::isunordered;\n"
533"using ::ldexp;\n"
534"using ::lgamma;\n"
535"using ::llrint;\n"
536"using ::llround;\n"
537"using ::log;\n"
538"using ::log10;\n"
539"using ::log1p;\n"
540"using ::log2;\n"
541"using ::logb;\n"
542"using ::lrint;\n"
543"using ::lround;\n"
544"using ::nearbyint;\n"
545"using ::nextafter;\n"
546"using ::pow;\n"
547"using ::remainder;\n"
548"using ::remquo;\n"
549"using ::rint;\n"
550"using ::round;\n"
551"using ::scalbln;\n"
552"using ::scalbn;\n"
553"using ::signbit;\n"
554"using ::sin;\n"
555"using ::sinh;\n"
556"using ::sqrt;\n"
557"using ::tan;\n"
558"using ::tanh;\n"
559"using ::tgamma;\n"
560"using ::trunc;\n"
561"\n"
562"// Well this is fun: We need to pull these symbols in for libc++, but we can't\n"
563"// pull them in with libstdc++, because its ::isinf and ::isnan are different\n"
564"// than its std::isinf and std::isnan.\n"
565"#ifndef __GLIBCXX__\n"
566"using ::isinf;\n"
567"using ::isnan;\n"
568"#endif\n"
569"\n"
570"// Finally, pull the \"foobarf\" functions that CUDA defines in its headers into\n"
571"// namespace std.\n"
572"using ::acosf;\n"
573"using ::acoshf;\n"
574"using ::asinf;\n"
575"using ::asinhf;\n"
576"using ::atan2f;\n"
577"using ::atanf;\n"
578"using ::atanhf;\n"
579"using ::cbrtf;\n"
580"using ::ceilf;\n"
581"using ::copysignf;\n"
582"using ::cosf;\n"
583"using ::coshf;\n"
584"using ::erfcf;\n"
585"using ::erff;\n"
586"using ::exp2f;\n"
587"using ::expf;\n"
588"using ::expm1f;\n"
589"using ::fabsf;\n"
590"using ::fdimf;\n"
591"using ::floorf;\n"
592"using ::fmaf;\n"
593"using ::fmaxf;\n"
594"using ::fminf;\n"
595"using ::fmodf;\n"
596"using ::frexpf;\n"
597"using ::hypotf;\n"
598"using ::ilogbf;\n"
599"using ::ldexpf;\n"
600"using ::lgammaf;\n"
601"using ::llrintf;\n"
602"using ::llroundf;\n"
603"using ::log10f;\n"
604"using ::log1pf;\n"
605"using ::log2f;\n"
606"using ::logbf;\n"
607"using ::logf;\n"
608"using ::lrintf;\n"
609"using ::lroundf;\n"
610"using ::modff;\n"
611"using ::nearbyintf;\n"
612"using ::nextafterf;\n"
613"using ::powf;\n"
614"using ::remainderf;\n"
615"using ::remquof;\n"
616"using ::rintf;\n"
617"using ::roundf;\n"
618"using ::scalblnf;\n"
619"using ::scalbnf;\n"
620"using ::sinf;\n"
621"using ::sinhf;\n"
622"using ::sqrtf;\n"
623"using ::tanf;\n"
624"using ::tanhf;\n"
625"using ::tgammaf;\n"
626"using ::truncf;\n"
627"\n"
628"#ifdef _LIBCPP_END_NAMESPACE_STD\n"
629"_LIBCPP_END_NAMESPACE_STD\n"
630"#else\n"
631"#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION\n"
632"_GLIBCXX_END_NAMESPACE_VERSION\n"
633"#endif\n"
634"} // namespace std\n"
635"#endif\n"
636"\n"
637"#undef __DEVICE__\n"
638"\n"
639"#endif\n"
640"" } ,
641 { "/builtins/__clang_cuda_complex_builtins.h" , "/*===-- __clang_cuda_complex_builtins - CUDA impls of runtime complex fns ---===\n"
642" *\n"
643" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
644" * of this software and associated documentation files (the \"Software\"), to deal\n"
645" * in the Software without restriction, including without limitation the rights\n"
646" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
647" * copies of the Software, and to permit persons to whom the Software is\n"
648" * furnished to do so, subject to the following conditions:\n"
649" *\n"
650" * The above copyright notice and this permission notice shall be included in\n"
651" * all copies or substantial portions of the Software.\n"
652" *\n"
653" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
654" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
655" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
656" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
657" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
658" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
659" * THE SOFTWARE.\n"
660" *\n"
661" *===-----------------------------------------------------------------------===\n"
662" */\n"
663"\n"
664"#ifndef __CLANG_CUDA_COMPLEX_BUILTINS\n"
665"#define __CLANG_CUDA_COMPLEX_BUILTINS\n"
666"\n"
667"// This header defines __muldc3, __mulsc3, __divdc3, and __divsc3. These are\n"
668"// libgcc functions that clang assumes are available when compiling c99 complex\n"
669"// operations. (These implementations come from libc++, and have been modified\n"
670"// to work with CUDA.)\n"
671"\n"
672"extern \"C\" inline __device__ double _Complex __muldc3(double __a, double __b,\n"
673" double __c, double __d) {\n"
674" double __ac = __a * __c;\n"
675" double __bd = __b * __d;\n"
676" double __ad = __a * __d;\n"
677" double __bc = __b * __c;\n"
678" double _Complex z;\n"
679" __real__(z) = __ac - __bd;\n"
680" __imag__(z) = __ad + __bc;\n"
681" if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {\n"
682" int __recalc = 0;\n"
683" if (std::isinf(__a) || std::isinf(__b)) {\n"
684" __a = std::copysign(std::isinf(__a) ? 1 : 0, __a);\n"
685" __b = std::copysign(std::isinf(__b) ? 1 : 0, __b);\n"
686" if (std::isnan(__c))\n"
687" __c = std::copysign(0, __c);\n"
688" if (std::isnan(__d))\n"
689" __d = std::copysign(0, __d);\n"
690" __recalc = 1;\n"
691" }\n"
692" if (std::isinf(__c) || std::isinf(__d)) {\n"
693" __c = std::copysign(std::isinf(__c) ? 1 : 0, __c);\n"
694" __d = std::copysign(std::isinf(__d) ? 1 : 0, __d);\n"
695" if (std::isnan(__a))\n"
696" __a = std::copysign(0, __a);\n"
697" if (std::isnan(__b))\n"
698" __b = std::copysign(0, __b);\n"
699" __recalc = 1;\n"
700" }\n"
701" if (!__recalc && (std::isinf(__ac) || std::isinf(__bd) ||\n"
702" std::isinf(__ad) || std::isinf(__bc))) {\n"
703" if (std::isnan(__a))\n"
704" __a = std::copysign(0, __a);\n"
705" if (std::isnan(__b))\n"
706" __b = std::copysign(0, __b);\n"
707" if (std::isnan(__c))\n"
708" __c = std::copysign(0, __c);\n"
709" if (std::isnan(__d))\n"
710" __d = std::copysign(0, __d);\n"
711" __recalc = 1;\n"
712" }\n"
713" if (__recalc) {\n"
714" // Can't use std::numeric_limits<double>::infinity() -- that doesn't have\n"
715" // a device overload (and isn't constexpr before C++11, naturally).\n"
716" __real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d);\n"
717" __imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c);\n"
718" }\n"
719" }\n"
720" return z;\n"
721"}\n"
722"\n"
723"extern \"C\" inline __device__ float _Complex __mulsc3(float __a, float __b,\n"
724" float __c, float __d) {\n"
725" float __ac = __a * __c;\n"
726" float __bd = __b * __d;\n"
727" float __ad = __a * __d;\n"
728" float __bc = __b * __c;\n"
729" float _Complex z;\n"
730" __real__(z) = __ac - __bd;\n"
731" __imag__(z) = __ad + __bc;\n"
732" if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {\n"
733" int __recalc = 0;\n"
734" if (std::isinf(__a) || std::isinf(__b)) {\n"
735" __a = std::copysign(std::isinf(__a) ? 1 : 0, __a);\n"
736" __b = std::copysign(std::isinf(__b) ? 1 : 0, __b);\n"
737" if (std::isnan(__c))\n"
738" __c = std::copysign(0, __c);\n"
739" if (std::isnan(__d))\n"
740" __d = std::copysign(0, __d);\n"
741" __recalc = 1;\n"
742" }\n"
743" if (std::isinf(__c) || std::isinf(__d)) {\n"
744" __c = std::copysign(std::isinf(__c) ? 1 : 0, __c);\n"
745" __d = std::copysign(std::isinf(__d) ? 1 : 0, __d);\n"
746" if (std::isnan(__a))\n"
747" __a = std::copysign(0, __a);\n"
748" if (std::isnan(__b))\n"
749" __b = std::copysign(0, __b);\n"
750" __recalc = 1;\n"
751" }\n"
752" if (!__recalc && (std::isinf(__ac) || std::isinf(__bd) ||\n"
753" std::isinf(__ad) || std::isinf(__bc))) {\n"
754" if (std::isnan(__a))\n"
755" __a = std::copysign(0, __a);\n"
756" if (std::isnan(__b))\n"
757" __b = std::copysign(0, __b);\n"
758" if (std::isnan(__c))\n"
759" __c = std::copysign(0, __c);\n"
760" if (std::isnan(__d))\n"
761" __d = std::copysign(0, __d);\n"
762" __recalc = 1;\n"
763" }\n"
764" if (__recalc) {\n"
765" __real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d);\n"
766" __imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c);\n"
767" }\n"
768" }\n"
769" return z;\n"
770"}\n"
771"\n"
772"extern \"C\" inline __device__ double _Complex __divdc3(double __a, double __b,\n"
773" double __c, double __d) {\n"
774" int __ilogbw = 0;\n"
775" // Can't use std::max, because that's defined in <algorithm>, and we don't\n"
776" // want to pull that in for every compile. The CUDA headers define\n"
777" // ::max(float, float) and ::max(double, double), which is sufficient for us.\n"
778" double __logbw = std::logb(max(std::abs(__c), std::abs(__d)));\n"
779" if (std::isfinite(__logbw)) {\n"
780" __ilogbw = (int)__logbw;\n"
781" __c = std::scalbn(__c, -__ilogbw);\n"
782" __d = std::scalbn(__d, -__ilogbw);\n"
783" }\n"
784" double __denom = __c * __c + __d * __d;\n"
785" double _Complex z;\n"
786" __real__(z) = std::scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);\n"
787" __imag__(z) = std::scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);\n"
788" if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {\n"
789" if ((__denom == 0.0) && (!std::isnan(__a) || !std::isnan(__b))) {\n"
790" __real__(z) = std::copysign(__builtin_huge_valf(), __c) * __a;\n"
791" __imag__(z) = std::copysign(__builtin_huge_valf(), __c) * __b;\n"
792" } else if ((std::isinf(__a) || std::isinf(__b)) && std::isfinite(__c) &&\n"
793" std::isfinite(__d)) {\n"
794" __a = std::copysign(std::isinf(__a) ? 1.0 : 0.0, __a);\n"
795" __b = std::copysign(std::isinf(__b) ? 1.0 : 0.0, __b);\n"
796" __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d);\n"
797" __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d);\n"
798" } else if (std::isinf(__logbw) && __logbw > 0.0 && std::isfinite(__a) &&\n"
799" std::isfinite(__b)) {\n"
800" __c = std::copysign(std::isinf(__c) ? 1.0 : 0.0, __c);\n"
801" __d = std::copysign(std::isinf(__d) ? 1.0 : 0.0, __d);\n"
802" __real__(z) = 0.0 * (__a * __c + __b * __d);\n"
803" __imag__(z) = 0.0 * (__b * __c - __a * __d);\n"
804" }\n"
805" }\n"
806" return z;\n"
807"}\n"
808"\n"
809"extern \"C\" inline __device__ float _Complex __divsc3(float __a, float __b,\n"
810" float __c, float __d) {\n"
811" int __ilogbw = 0;\n"
812" float __logbw = std::logb(max(std::abs(__c), std::abs(__d)));\n"
813" if (std::isfinite(__logbw)) {\n"
814" __ilogbw = (int)__logbw;\n"
815" __c = std::scalbn(__c, -__ilogbw);\n"
816" __d = std::scalbn(__d, -__ilogbw);\n"
817" }\n"
818" float __denom = __c * __c + __d * __d;\n"
819" float _Complex z;\n"
820" __real__(z) = std::scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);\n"
821" __imag__(z) = std::scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);\n"
822" if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {\n"
823" if ((__denom == 0) && (!std::isnan(__a) || !std::isnan(__b))) {\n"
824" __real__(z) = std::copysign(__builtin_huge_valf(), __c) * __a;\n"
825" __imag__(z) = std::copysign(__builtin_huge_valf(), __c) * __b;\n"
826" } else if ((std::isinf(__a) || std::isinf(__b)) && std::isfinite(__c) &&\n"
827" std::isfinite(__d)) {\n"
828" __a = std::copysign(std::isinf(__a) ? 1 : 0, __a);\n"
829" __b = std::copysign(std::isinf(__b) ? 1 : 0, __b);\n"
830" __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d);\n"
831" __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d);\n"
832" } else if (std::isinf(__logbw) && __logbw > 0 && std::isfinite(__a) &&\n"
833" std::isfinite(__b)) {\n"
834" __c = std::copysign(std::isinf(__c) ? 1 : 0, __c);\n"
835" __d = std::copysign(std::isinf(__d) ? 1 : 0, __d);\n"
836" __real__(z) = 0 * (__a * __c + __b * __d);\n"
837" __imag__(z) = 0 * (__b * __c - __a * __d);\n"
838" }\n"
839" }\n"
840" return z;\n"
841"}\n"
842"\n"
843"#endif // __CLANG_CUDA_COMPLEX_BUILTINS\n"
844"" } ,
845 { "/builtins/__clang_cuda_device_functions.h" , "/*===---- __clang_cuda_device_functions.h - CUDA runtime support -----------===\n"
846" *\n"
847" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
848" * of this software and associated documentation files (the \"Software\"), to deal\n"
849" * in the Software without restriction, including without limitation the rights\n"
850" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
851" * copies of the Software, and to permit persons to whom the Software is\n"
852" * furnished to do so, subject to the following conditions:\n"
853" *\n"
854" * The above copyright notice and this permission notice shall be included in\n"
855" * all copies or substantial portions of the Software.\n"
856" *\n"
857" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
858" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
859" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
860" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
861" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
862" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
863" * THE SOFTWARE.\n"
864" *\n"
865" *===-----------------------------------------------------------------------===\n"
866" */\n"
867"\n"
868"#ifndef __CLANG_CUDA_DEVICE_FUNCTIONS_H__\n"
869"#define __CLANG_CUDA_DEVICE_FUNCTIONS_H__\n"
870"\n"
871"#if CUDA_VERSION < 9000\n"
872"#error This file is intended to be used with CUDA-9+ only.\n"
873"#endif\n"
874"\n"
875"// __DEVICE__ is a helper macro with common set of attributes for the wrappers\n"
876"// we implement in this file. We need static in order to avoid emitting unused\n"
877"// functions and __forceinline__ helps inlining these wrappers at -O1.\n"
878"#pragma push_macro(\"__DEVICE__\")\n"
879"#define __DEVICE__ static __device__ __forceinline__\n"
880"\n"
881"// libdevice provides fast low precision and slow full-recision implementations\n"
882"// for some functions. Which one gets selected depends on\n"
883"// __CLANG_CUDA_APPROX_TRANSCENDENTALS__ which gets defined by clang if\n"
884"// -ffast-math or -fcuda-approx-transcendentals are in effect.\n"
885"#pragma push_macro(\"__FAST_OR_SLOW\")\n"
886"#if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__)\n"
887"#define __FAST_OR_SLOW(fast, slow) fast\n"
888"#else\n"
889"#define __FAST_OR_SLOW(fast, slow) slow\n"
890"#endif\n"
891"\n"
892"__DEVICE__ int __all(int __a) { return __nvvm_vote_all(__a); }\n"
893"__DEVICE__ int __any(int __a) { return __nvvm_vote_any(__a); }\n"
894"__DEVICE__ unsigned int __ballot(int __a) { return __nvvm_vote_ballot(__a); }\n"
895"__DEVICE__ unsigned int __brev(unsigned int __a) { return __nv_brev(__a); }\n"
896"__DEVICE__ unsigned long long __brevll(unsigned long long __a) {\n"
897" return __nv_brevll(__a);\n"
898"}\n"
899"__DEVICE__ void __brkpt() { asm volatile(\"brkpt;\"); }\n"
900"__DEVICE__ void __brkpt(int __a) { __brkpt(); }\n"
901"__DEVICE__ unsigned int __byte_perm(unsigned int __a, unsigned int __b,\n"
902" unsigned int __c) {\n"
903" return __nv_byte_perm(__a, __b, __c);\n"
904"}\n"
905"__DEVICE__ int __clz(int __a) { return __nv_clz(__a); }\n"
906"__DEVICE__ int __clzll(long long __a) { return __nv_clzll(__a); }\n"
907"__DEVICE__ float __cosf(float __a) { return __nv_fast_cosf(__a); }\n"
908"__DEVICE__ double __dAtomicAdd(double *__p, double __v) {\n"
909" return __nvvm_atom_add_gen_d(__p, __v);\n"
910"}\n"
911"__DEVICE__ double __dAtomicAdd_block(double *__p, double __v) {\n"
912" return __nvvm_atom_cta_add_gen_d(__p, __v);\n"
913"}\n"
914"__DEVICE__ double __dAtomicAdd_system(double *__p, double __v) {\n"
915" return __nvvm_atom_sys_add_gen_d(__p, __v);\n"
916"}\n"
917"__DEVICE__ double __dadd_rd(double __a, double __b) {\n"
918" return __nv_dadd_rd(__a, __b);\n"
919"}\n"
920"__DEVICE__ double __dadd_rn(double __a, double __b) {\n"
921" return __nv_dadd_rn(__a, __b);\n"
922"}\n"
923"__DEVICE__ double __dadd_ru(double __a, double __b) {\n"
924" return __nv_dadd_ru(__a, __b);\n"
925"}\n"
926"__DEVICE__ double __dadd_rz(double __a, double __b) {\n"
927" return __nv_dadd_rz(__a, __b);\n"
928"}\n"
929"__DEVICE__ double __ddiv_rd(double __a, double __b) {\n"
930" return __nv_ddiv_rd(__a, __b);\n"
931"}\n"
932"__DEVICE__ double __ddiv_rn(double __a, double __b) {\n"
933" return __nv_ddiv_rn(__a, __b);\n"
934"}\n"
935"__DEVICE__ double __ddiv_ru(double __a, double __b) {\n"
936" return __nv_ddiv_ru(__a, __b);\n"
937"}\n"
938"__DEVICE__ double __ddiv_rz(double __a, double __b) {\n"
939" return __nv_ddiv_rz(__a, __b);\n"
940"}\n"
941"__DEVICE__ double __dmul_rd(double __a, double __b) {\n"
942" return __nv_dmul_rd(__a, __b);\n"
943"}\n"
944"__DEVICE__ double __dmul_rn(double __a, double __b) {\n"
945" return __nv_dmul_rn(__a, __b);\n"
946"}\n"
947"__DEVICE__ double __dmul_ru(double __a, double __b) {\n"
948" return __nv_dmul_ru(__a, __b);\n"
949"}\n"
950"__DEVICE__ double __dmul_rz(double __a, double __b) {\n"
951" return __nv_dmul_rz(__a, __b);\n"
952"}\n"
953"__DEVICE__ float __double2float_rd(double __a) {\n"
954" return __nv_double2float_rd(__a);\n"
955"}\n"
956"__DEVICE__ float __double2float_rn(double __a) {\n"
957" return __nv_double2float_rn(__a);\n"
958"}\n"
959"__DEVICE__ float __double2float_ru(double __a) {\n"
960" return __nv_double2float_ru(__a);\n"
961"}\n"
962"__DEVICE__ float __double2float_rz(double __a) {\n"
963" return __nv_double2float_rz(__a);\n"
964"}\n"
965"__DEVICE__ int __double2hiint(double __a) { return __nv_double2hiint(__a); }\n"
966"__DEVICE__ int __double2int_rd(double __a) { return __nv_double2int_rd(__a); }\n"
967"__DEVICE__ int __double2int_rn(double __a) { return __nv_double2int_rn(__a); }\n"
968"__DEVICE__ int __double2int_ru(double __a) { return __nv_double2int_ru(__a); }\n"
969"__DEVICE__ int __double2int_rz(double __a) { return __nv_double2int_rz(__a); }\n"
970"__DEVICE__ long long __double2ll_rd(double __a) {\n"
971" return __nv_double2ll_rd(__a);\n"
972"}\n"
973"__DEVICE__ long long __double2ll_rn(double __a) {\n"
974" return __nv_double2ll_rn(__a);\n"
975"}\n"
976"__DEVICE__ long long __double2ll_ru(double __a) {\n"
977" return __nv_double2ll_ru(__a);\n"
978"}\n"
979"__DEVICE__ long long __double2ll_rz(double __a) {\n"
980" return __nv_double2ll_rz(__a);\n"
981"}\n"
982"__DEVICE__ int __double2loint(double __a) { return __nv_double2loint(__a); }\n"
983"__DEVICE__ unsigned int __double2uint_rd(double __a) {\n"
984" return __nv_double2uint_rd(__a);\n"
985"}\n"
986"__DEVICE__ unsigned int __double2uint_rn(double __a) {\n"
987" return __nv_double2uint_rn(__a);\n"
988"}\n"
989"__DEVICE__ unsigned int __double2uint_ru(double __a) {\n"
990" return __nv_double2uint_ru(__a);\n"
991"}\n"
992"__DEVICE__ unsigned int __double2uint_rz(double __a) {\n"
993" return __nv_double2uint_rz(__a);\n"
994"}\n"
995"__DEVICE__ unsigned long long __double2ull_rd(double __a) {\n"
996" return __nv_double2ull_rd(__a);\n"
997"}\n"
998"__DEVICE__ unsigned long long __double2ull_rn(double __a) {\n"
999" return __nv_double2ull_rn(__a);\n"
1000"}\n"
1001"__DEVICE__ unsigned long long __double2ull_ru(double __a) {\n"
1002" return __nv_double2ull_ru(__a);\n"
1003"}\n"
1004"__DEVICE__ unsigned long long __double2ull_rz(double __a) {\n"
1005" return __nv_double2ull_rz(__a);\n"
1006"}\n"
1007"__DEVICE__ long long __double_as_longlong(double __a) {\n"
1008" return __nv_double_as_longlong(__a);\n"
1009"}\n"
1010"__DEVICE__ double __drcp_rd(double __a) { return __nv_drcp_rd(__a); }\n"
1011"__DEVICE__ double __drcp_rn(double __a) { return __nv_drcp_rn(__a); }\n"
1012"__DEVICE__ double __drcp_ru(double __a) { return __nv_drcp_ru(__a); }\n"
1013"__DEVICE__ double __drcp_rz(double __a) { return __nv_drcp_rz(__a); }\n"
1014"__DEVICE__ double __dsqrt_rd(double __a) { return __nv_dsqrt_rd(__a); }\n"
1015"__DEVICE__ double __dsqrt_rn(double __a) { return __nv_dsqrt_rn(__a); }\n"
1016"__DEVICE__ double __dsqrt_ru(double __a) { return __nv_dsqrt_ru(__a); }\n"
1017"__DEVICE__ double __dsqrt_rz(double __a) { return __nv_dsqrt_rz(__a); }\n"
1018"__DEVICE__ double __dsub_rd(double __a, double __b) {\n"
1019" return __nv_dsub_rd(__a, __b);\n"
1020"}\n"
1021"__DEVICE__ double __dsub_rn(double __a, double __b) {\n"
1022" return __nv_dsub_rn(__a, __b);\n"
1023"}\n"
1024"__DEVICE__ double __dsub_ru(double __a, double __b) {\n"
1025" return __nv_dsub_ru(__a, __b);\n"
1026"}\n"
1027"__DEVICE__ double __dsub_rz(double __a, double __b) {\n"
1028" return __nv_dsub_rz(__a, __b);\n"
1029"}\n"
1030"__DEVICE__ float __exp10f(float __a) { return __nv_fast_exp10f(__a); }\n"
1031"__DEVICE__ float __expf(float __a) { return __nv_fast_expf(__a); }\n"
1032"__DEVICE__ float __fAtomicAdd(float *__p, float __v) {\n"
1033" return __nvvm_atom_add_gen_f(__p, __v);\n"
1034"}\n"
1035"__DEVICE__ float __fAtomicAdd_block(float *__p, float __v) {\n"
1036" return __nvvm_atom_cta_add_gen_f(__p, __v);\n"
1037"}\n"
1038"__DEVICE__ float __fAtomicAdd_system(float *__p, float __v) {\n"
1039" return __nvvm_atom_sys_add_gen_f(__p, __v);\n"
1040"}\n"
1041"__DEVICE__ float __fAtomicExch(float *__p, float __v) {\n"
1042" return __nv_int_as_float(\n"
1043" __nvvm_atom_xchg_gen_i((int *)__p, __nv_float_as_int(__v)));\n"
1044"}\n"
1045"__DEVICE__ float __fAtomicExch_block(float *__p, float __v) {\n"
1046" return __nv_int_as_float(\n"
1047" __nvvm_atom_cta_xchg_gen_i((int *)__p, __nv_float_as_int(__v)));\n"
1048"}\n"
1049"__DEVICE__ float __fAtomicExch_system(float *__p, float __v) {\n"
1050" return __nv_int_as_float(\n"
1051" __nvvm_atom_sys_xchg_gen_i((int *)__p, __nv_float_as_int(__v)));\n"
1052"}\n"
1053"__DEVICE__ float __fadd_rd(float __a, float __b) {\n"
1054" return __nv_fadd_rd(__a, __b);\n"
1055"}\n"
1056"__DEVICE__ float __fadd_rn(float __a, float __b) {\n"
1057" return __nv_fadd_rn(__a, __b);\n"
1058"}\n"
1059"__DEVICE__ float __fadd_ru(float __a, float __b) {\n"
1060" return __nv_fadd_ru(__a, __b);\n"
1061"}\n"
1062"__DEVICE__ float __fadd_rz(float __a, float __b) {\n"
1063" return __nv_fadd_rz(__a, __b);\n"
1064"}\n"
1065"__DEVICE__ float __fdiv_rd(float __a, float __b) {\n"
1066" return __nv_fdiv_rd(__a, __b);\n"
1067"}\n"
1068"__DEVICE__ float __fdiv_rn(float __a, float __b) {\n"
1069" return __nv_fdiv_rn(__a, __b);\n"
1070"}\n"
1071"__DEVICE__ float __fdiv_ru(float __a, float __b) {\n"
1072" return __nv_fdiv_ru(__a, __b);\n"
1073"}\n"
1074"__DEVICE__ float __fdiv_rz(float __a, float __b) {\n"
1075" return __nv_fdiv_rz(__a, __b);\n"
1076"}\n"
1077"__DEVICE__ float __fdividef(float __a, float __b) {\n"
1078" return __nv_fast_fdividef(__a, __b);\n"
1079"}\n"
1080"__DEVICE__ int __ffs(int __a) { return __nv_ffs(__a); }\n"
1081"__DEVICE__ int __ffsll(long long __a) { return __nv_ffsll(__a); }\n"
1082"__DEVICE__ int __finite(double __a) { return __nv_isfinited(__a); }\n"
1083"__DEVICE__ int __finitef(float __a) { return __nv_finitef(__a); }\n"
1084"__DEVICE__ int __float2int_rd(float __a) { return __nv_float2int_rd(__a); }\n"
1085"__DEVICE__ int __float2int_rn(float __a) { return __nv_float2int_rn(__a); }\n"
1086"__DEVICE__ int __float2int_ru(float __a) { return __nv_float2int_ru(__a); }\n"
1087"__DEVICE__ int __float2int_rz(float __a) { return __nv_float2int_rz(__a); }\n"
1088"__DEVICE__ long long __float2ll_rd(float __a) { return __nv_float2ll_rd(__a); }\n"
1089"__DEVICE__ long long __float2ll_rn(float __a) { return __nv_float2ll_rn(__a); }\n"
1090"__DEVICE__ long long __float2ll_ru(float __a) { return __nv_float2ll_ru(__a); }\n"
1091"__DEVICE__ long long __float2ll_rz(float __a) { return __nv_float2ll_rz(__a); }\n"
1092"__DEVICE__ unsigned int __float2uint_rd(float __a) {\n"
1093" return __nv_float2uint_rd(__a);\n"
1094"}\n"
1095"__DEVICE__ unsigned int __float2uint_rn(float __a) {\n"
1096" return __nv_float2uint_rn(__a);\n"
1097"}\n"
1098"__DEVICE__ unsigned int __float2uint_ru(float __a) {\n"
1099" return __nv_float2uint_ru(__a);\n"
1100"}\n"
1101"__DEVICE__ unsigned int __float2uint_rz(float __a) {\n"
1102" return __nv_float2uint_rz(__a);\n"
1103"}\n"
1104"__DEVICE__ unsigned long long __float2ull_rd(float __a) {\n"
1105" return __nv_float2ull_rd(__a);\n"
1106"}\n"
1107"__DEVICE__ unsigned long long __float2ull_rn(float __a) {\n"
1108" return __nv_float2ull_rn(__a);\n"
1109"}\n"
1110"__DEVICE__ unsigned long long __float2ull_ru(float __a) {\n"
1111" return __nv_float2ull_ru(__a);\n"
1112"}\n"
1113"__DEVICE__ unsigned long long __float2ull_rz(float __a) {\n"
1114" return __nv_float2ull_rz(__a);\n"
1115"}\n"
1116"__DEVICE__ int __float_as_int(float __a) { return __nv_float_as_int(__a); }\n"
1117"__DEVICE__ unsigned int __float_as_uint(float __a) {\n"
1118" return __nv_float_as_uint(__a);\n"
1119"}\n"
1120"__DEVICE__ double __fma_rd(double __a, double __b, double __c) {\n"
1121" return __nv_fma_rd(__a, __b, __c);\n"
1122"}\n"
1123"__DEVICE__ double __fma_rn(double __a, double __b, double __c) {\n"
1124" return __nv_fma_rn(__a, __b, __c);\n"
1125"}\n"
1126"__DEVICE__ double __fma_ru(double __a, double __b, double __c) {\n"
1127" return __nv_fma_ru(__a, __b, __c);\n"
1128"}\n"
1129"__DEVICE__ double __fma_rz(double __a, double __b, double __c) {\n"
1130" return __nv_fma_rz(__a, __b, __c);\n"
1131"}\n"
1132"__DEVICE__ float __fmaf_ieee_rd(float __a, float __b, float __c) {\n"
1133" return __nv_fmaf_ieee_rd(__a, __b, __c);\n"
1134"}\n"
1135"__DEVICE__ float __fmaf_ieee_rn(float __a, float __b, float __c) {\n"
1136" return __nv_fmaf_ieee_rn(__a, __b, __c);\n"
1137"}\n"
1138"__DEVICE__ float __fmaf_ieee_ru(float __a, float __b, float __c) {\n"
1139" return __nv_fmaf_ieee_ru(__a, __b, __c);\n"
1140"}\n"
1141"__DEVICE__ float __fmaf_ieee_rz(float __a, float __b, float __c) {\n"
1142" return __nv_fmaf_ieee_rz(__a, __b, __c);\n"
1143"}\n"
1144"__DEVICE__ float __fmaf_rd(float __a, float __b, float __c) {\n"
1145" return __nv_fmaf_rd(__a, __b, __c);\n"
1146"}\n"
1147"__DEVICE__ float __fmaf_rn(float __a, float __b, float __c) {\n"
1148" return __nv_fmaf_rn(__a, __b, __c);\n"
1149"}\n"
1150"__DEVICE__ float __fmaf_ru(float __a, float __b, float __c) {\n"
1151" return __nv_fmaf_ru(__a, __b, __c);\n"
1152"}\n"
1153"__DEVICE__ float __fmaf_rz(float __a, float __b, float __c) {\n"
1154" return __nv_fmaf_rz(__a, __b, __c);\n"
1155"}\n"
1156"__DEVICE__ float __fmul_rd(float __a, float __b) {\n"
1157" return __nv_fmul_rd(__a, __b);\n"
1158"}\n"
1159"__DEVICE__ float __fmul_rn(float __a, float __b) {\n"
1160" return __nv_fmul_rn(__a, __b);\n"
1161"}\n"
1162"__DEVICE__ float __fmul_ru(float __a, float __b) {\n"
1163" return __nv_fmul_ru(__a, __b);\n"
1164"}\n"
1165"__DEVICE__ float __fmul_rz(float __a, float __b) {\n"
1166" return __nv_fmul_rz(__a, __b);\n"
1167"}\n"
1168"__DEVICE__ float __frcp_rd(float __a) { return __nv_frcp_rd(__a); }\n"
1169"__DEVICE__ float __frcp_rn(float __a) { return __nv_frcp_rn(__a); }\n"
1170"__DEVICE__ float __frcp_ru(float __a) { return __nv_frcp_ru(__a); }\n"
1171"__DEVICE__ float __frcp_rz(float __a) { return __nv_frcp_rz(__a); }\n"
1172"__DEVICE__ float __frsqrt_rn(float __a) { return __nv_frsqrt_rn(__a); }\n"
1173"__DEVICE__ float __fsqrt_rd(float __a) { return __nv_fsqrt_rd(__a); }\n"
1174"__DEVICE__ float __fsqrt_rn(float __a) { return __nv_fsqrt_rn(__a); }\n"
1175"__DEVICE__ float __fsqrt_ru(float __a) { return __nv_fsqrt_ru(__a); }\n"
1176"__DEVICE__ float __fsqrt_rz(float __a) { return __nv_fsqrt_rz(__a); }\n"
1177"__DEVICE__ float __fsub_rd(float __a, float __b) {\n"
1178" return __nv_fsub_rd(__a, __b);\n"
1179"}\n"
1180"__DEVICE__ float __fsub_rn(float __a, float __b) {\n"
1181" return __nv_fsub_rn(__a, __b);\n"
1182"}\n"
1183"__DEVICE__ float __fsub_ru(float __a, float __b) {\n"
1184" return __nv_fsub_ru(__a, __b);\n"
1185"}\n"
1186"__DEVICE__ float __fsub_rz(float __a, float __b) {\n"
1187" return __nv_fsub_rz(__a, __b);\n"
1188"}\n"
1189"__DEVICE__ int __hadd(int __a, int __b) { return __nv_hadd(__a, __b); }\n"
1190"__DEVICE__ double __hiloint2double(int __a, int __b) {\n"
1191" return __nv_hiloint2double(__a, __b);\n"
1192"}\n"
1193"__DEVICE__ int __iAtomicAdd(int *__p, int __v) {\n"
1194" return __nvvm_atom_add_gen_i(__p, __v);\n"
1195"}\n"
1196"__DEVICE__ int __iAtomicAdd_block(int *__p, int __v) {\n"
1197" __nvvm_atom_cta_add_gen_i(__p, __v);\n"
1198"}\n"
1199"__DEVICE__ int __iAtomicAdd_system(int *__p, int __v) {\n"
1200" __nvvm_atom_sys_add_gen_i(__p, __v);\n"
1201"}\n"
1202"__DEVICE__ int __iAtomicAnd(int *__p, int __v) {\n"
1203" return __nvvm_atom_and_gen_i(__p, __v);\n"
1204"}\n"
1205"__DEVICE__ int __iAtomicAnd_block(int *__p, int __v) {\n"
1206" return __nvvm_atom_cta_and_gen_i(__p, __v);\n"
1207"}\n"
1208"__DEVICE__ int __iAtomicAnd_system(int *__p, int __v) {\n"
1209" return __nvvm_atom_sys_and_gen_i(__p, __v);\n"
1210"}\n"
1211"__DEVICE__ int __iAtomicCAS(int *__p, int __cmp, int __v) {\n"
1212" return __nvvm_atom_cas_gen_i(__p, __cmp, __v);\n"
1213"}\n"
1214"__DEVICE__ int __iAtomicCAS_block(int *__p, int __cmp, int __v) {\n"
1215" return __nvvm_atom_cta_cas_gen_i(__p, __cmp, __v);\n"
1216"}\n"
1217"__DEVICE__ int __iAtomicCAS_system(int *__p, int __cmp, int __v) {\n"
1218" return __nvvm_atom_sys_cas_gen_i(__p, __cmp, __v);\n"
1219"}\n"
1220"__DEVICE__ int __iAtomicExch(int *__p, int __v) {\n"
1221" return __nvvm_atom_xchg_gen_i(__p, __v);\n"
1222"}\n"
1223"__DEVICE__ int __iAtomicExch_block(int *__p, int __v) {\n"
1224" return __nvvm_atom_cta_xchg_gen_i(__p, __v);\n"
1225"}\n"
1226"__DEVICE__ int __iAtomicExch_system(int *__p, int __v) {\n"
1227" return __nvvm_atom_sys_xchg_gen_i(__p, __v);\n"
1228"}\n"
1229"__DEVICE__ int __iAtomicMax(int *__p, int __v) {\n"
1230" return __nvvm_atom_max_gen_i(__p, __v);\n"
1231"}\n"
1232"__DEVICE__ int __iAtomicMax_block(int *__p, int __v) {\n"
1233" return __nvvm_atom_cta_max_gen_i(__p, __v);\n"
1234"}\n"
1235"__DEVICE__ int __iAtomicMax_system(int *__p, int __v) {\n"
1236" return __nvvm_atom_sys_max_gen_i(__p, __v);\n"
1237"}\n"
1238"__DEVICE__ int __iAtomicMin(int *__p, int __v) {\n"
1239" return __nvvm_atom_min_gen_i(__p, __v);\n"
1240"}\n"
1241"__DEVICE__ int __iAtomicMin_block(int *__p, int __v) {\n"
1242" return __nvvm_atom_cta_min_gen_i(__p, __v);\n"
1243"}\n"
1244"__DEVICE__ int __iAtomicMin_system(int *__p, int __v) {\n"
1245" return __nvvm_atom_sys_min_gen_i(__p, __v);\n"
1246"}\n"
1247"__DEVICE__ int __iAtomicOr(int *__p, int __v) {\n"
1248" return __nvvm_atom_or_gen_i(__p, __v);\n"
1249"}\n"
1250"__DEVICE__ int __iAtomicOr_block(int *__p, int __v) {\n"
1251" return __nvvm_atom_cta_or_gen_i(__p, __v);\n"
1252"}\n"
1253"__DEVICE__ int __iAtomicOr_system(int *__p, int __v) {\n"
1254" return __nvvm_atom_sys_or_gen_i(__p, __v);\n"
1255"}\n"
1256"__DEVICE__ int __iAtomicXor(int *__p, int __v) {\n"
1257" return __nvvm_atom_xor_gen_i(__p, __v);\n"
1258"}\n"
1259"__DEVICE__ int __iAtomicXor_block(int *__p, int __v) {\n"
1260" return __nvvm_atom_cta_xor_gen_i(__p, __v);\n"
1261"}\n"
1262"__DEVICE__ int __iAtomicXor_system(int *__p, int __v) {\n"
1263" return __nvvm_atom_sys_xor_gen_i(__p, __v);\n"
1264"}\n"
1265"__DEVICE__ long long __illAtomicMax(long long *__p, long long __v) {\n"
1266" return __nvvm_atom_max_gen_ll(__p, __v);\n"
1267"}\n"
1268"__DEVICE__ long long __illAtomicMax_block(long long *__p, long long __v) {\n"
1269" return __nvvm_atom_cta_max_gen_ll(__p, __v);\n"
1270"}\n"
1271"__DEVICE__ long long __illAtomicMax_system(long long *__p, long long __v) {\n"
1272" return __nvvm_atom_sys_max_gen_ll(__p, __v);\n"
1273"}\n"
1274"__DEVICE__ long long __illAtomicMin(long long *__p, long long __v) {\n"
1275" return __nvvm_atom_min_gen_ll(__p, __v);\n"
1276"}\n"
1277"__DEVICE__ long long __illAtomicMin_block(long long *__p, long long __v) {\n"
1278" return __nvvm_atom_cta_min_gen_ll(__p, __v);\n"
1279"}\n"
1280"__DEVICE__ long long __illAtomicMin_system(long long *__p, long long __v) {\n"
1281" return __nvvm_atom_sys_min_gen_ll(__p, __v);\n"
1282"}\n"
1283"__DEVICE__ double __int2double_rn(int __a) { return __nv_int2double_rn(__a); }\n"
1284"__DEVICE__ float __int2float_rd(int __a) { return __nv_int2float_rd(__a); }\n"
1285"__DEVICE__ float __int2float_rn(int __a) { return __nv_int2float_rn(__a); }\n"
1286"__DEVICE__ float __int2float_ru(int __a) { return __nv_int2float_ru(__a); }\n"
1287"__DEVICE__ float __int2float_rz(int __a) { return __nv_int2float_rz(__a); }\n"
1288"__DEVICE__ float __int_as_float(int __a) { return __nv_int_as_float(__a); }\n"
1289"__DEVICE__ int __isfinited(double __a) { return __nv_isfinited(__a); }\n"
1290"__DEVICE__ int __isinf(double __a) { return __nv_isinfd(__a); }\n"
1291"__DEVICE__ int __isinff(float __a) { return __nv_isinff(__a); }\n"
1292"__DEVICE__ int __isnan(double __a) { return __nv_isnand(__a); }\n"
1293"__DEVICE__ int __isnanf(float __a) { return __nv_isnanf(__a); }\n"
1294"__DEVICE__ double __ll2double_rd(long long __a) {\n"
1295" return __nv_ll2double_rd(__a);\n"
1296"}\n"
1297"__DEVICE__ double __ll2double_rn(long long __a) {\n"
1298" return __nv_ll2double_rn(__a);\n"
1299"}\n"
1300"__DEVICE__ double __ll2double_ru(long long __a) {\n"
1301" return __nv_ll2double_ru(__a);\n"
1302"}\n"
1303"__DEVICE__ double __ll2double_rz(long long __a) {\n"
1304" return __nv_ll2double_rz(__a);\n"
1305"}\n"
1306"__DEVICE__ float __ll2float_rd(long long __a) { return __nv_ll2float_rd(__a); }\n"
1307"__DEVICE__ float __ll2float_rn(long long __a) { return __nv_ll2float_rn(__a); }\n"
1308"__DEVICE__ float __ll2float_ru(long long __a) { return __nv_ll2float_ru(__a); }\n"
1309"__DEVICE__ float __ll2float_rz(long long __a) { return __nv_ll2float_rz(__a); }\n"
1310"__DEVICE__ long long __llAtomicAnd(long long *__p, long long __v) {\n"
1311" return __nvvm_atom_and_gen_ll(__p, __v);\n"
1312"}\n"
1313"__DEVICE__ long long __llAtomicAnd_block(long long *__p, long long __v) {\n"
1314" return __nvvm_atom_cta_and_gen_ll(__p, __v);\n"
1315"}\n"
1316"__DEVICE__ long long __llAtomicAnd_system(long long *__p, long long __v) {\n"
1317" return __nvvm_atom_sys_and_gen_ll(__p, __v);\n"
1318"}\n"
1319"__DEVICE__ long long __llAtomicOr(long long *__p, long long __v) {\n"
1320" return __nvvm_atom_or_gen_ll(__p, __v);\n"
1321"}\n"
1322"__DEVICE__ long long __llAtomicOr_block(long long *__p, long long __v) {\n"
1323" return __nvvm_atom_cta_or_gen_ll(__p, __v);\n"
1324"}\n"
1325"__DEVICE__ long long __llAtomicOr_system(long long *__p, long long __v) {\n"
1326" return __nvvm_atom_sys_or_gen_ll(__p, __v);\n"
1327"}\n"
1328"__DEVICE__ long long __llAtomicXor(long long *__p, long long __v) {\n"
1329" return __nvvm_atom_xor_gen_ll(__p, __v);\n"
1330"}\n"
1331"__DEVICE__ long long __llAtomicXor_block(long long *__p, long long __v) {\n"
1332" return __nvvm_atom_cta_xor_gen_ll(__p, __v);\n"
1333"}\n"
1334"__DEVICE__ long long __llAtomicXor_system(long long *__p, long long __v) {\n"
1335" return __nvvm_atom_sys_xor_gen_ll(__p, __v);\n"
1336"}\n"
1337"__DEVICE__ float __log10f(float __a) { return __nv_fast_log10f(__a); }\n"
1338"__DEVICE__ float __log2f(float __a) { return __nv_fast_log2f(__a); }\n"
1339"__DEVICE__ float __logf(float __a) { return __nv_fast_logf(__a); }\n"
1340"__DEVICE__ double __longlong_as_double(long long __a) {\n"
1341" return __nv_longlong_as_double(__a);\n"
1342"}\n"
1343"__DEVICE__ int __mul24(int __a, int __b) { return __nv_mul24(__a, __b); }\n"
1344"__DEVICE__ long long __mul64hi(long long __a, long long __b) {\n"
1345" return __nv_mul64hi(__a, __b);\n"
1346"}\n"
1347"__DEVICE__ int __mulhi(int __a, int __b) { return __nv_mulhi(__a, __b); }\n"
1348"__DEVICE__ unsigned int __pm0(void) { return __nvvm_read_ptx_sreg_pm0(); }\n"
1349"__DEVICE__ unsigned int __pm1(void) { return __nvvm_read_ptx_sreg_pm1(); }\n"
1350"__DEVICE__ unsigned int __pm2(void) { return __nvvm_read_ptx_sreg_pm2(); }\n"
1351"__DEVICE__ unsigned int __pm3(void) { return __nvvm_read_ptx_sreg_pm3(); }\n"
1352"__DEVICE__ int __popc(int __a) { return __nv_popc(__a); }\n"
1353"__DEVICE__ int __popcll(long long __a) { return __nv_popcll(__a); }\n"
1354"__DEVICE__ float __powf(float __a, float __b) {\n"
1355" return __nv_fast_powf(__a, __b);\n"
1356"}\n"
1357"\n"
1358"// Parameter must have a known integer value.\n"
1359"#define __prof_trigger(__a) asm __volatile__(\"pmevent \\t%0;\" ::\"i\"(__a))\n"
1360"__DEVICE__ int __rhadd(int __a, int __b) { return __nv_rhadd(__a, __b); }\n"
1361"__DEVICE__ unsigned int __sad(int __a, int __b, unsigned int __c) {\n"
1362" return __nv_sad(__a, __b, __c);\n"
1363"}\n"
1364"__DEVICE__ float __saturatef(float __a) { return __nv_saturatef(__a); }\n"
1365"__DEVICE__ int __signbitd(double __a) { return __nv_signbitd(__a); }\n"
1366"__DEVICE__ int __signbitf(float __a) { return __nv_signbitf(__a); }\n"
1367"__DEVICE__ void __sincosf(float __a, float *__sptr, float *__cptr) {\n"
1368" return __nv_fast_sincosf(__a, __sptr, __cptr);\n"
1369"}\n"
1370"__DEVICE__ float __sinf(float __a) { return __nv_fast_sinf(__a); }\n"
1371"__DEVICE__ int __syncthreads_and(int __a) { return __nvvm_bar0_and(__a); }\n"
1372"__DEVICE__ int __syncthreads_count(int __a) { return __nvvm_bar0_popc(__a); }\n"
1373"__DEVICE__ int __syncthreads_or(int __a) { return __nvvm_bar0_or(__a); }\n"
1374"__DEVICE__ float __tanf(float __a) { return __nv_fast_tanf(__a); }\n"
1375"__DEVICE__ void __threadfence(void) { __nvvm_membar_gl(); }\n"
1376"__DEVICE__ void __threadfence_block(void) { __nvvm_membar_cta(); };\n"
1377"__DEVICE__ void __threadfence_system(void) { __nvvm_membar_sys(); };\n"
1378"__DEVICE__ void __trap(void) { asm volatile(\"trap;\"); }\n"
1379"__DEVICE__ unsigned int __uAtomicAdd(unsigned int *__p, unsigned int __v) {\n"
1380" return __nvvm_atom_add_gen_i((int *)__p, __v);\n"
1381"}\n"
1382"__DEVICE__ unsigned int __uAtomicAdd_block(unsigned int *__p,\n"
1383" unsigned int __v) {\n"
1384" return __nvvm_atom_cta_add_gen_i((int *)__p, __v);\n"
1385"}\n"
1386"__DEVICE__ unsigned int __uAtomicAdd_system(unsigned int *__p,\n"
1387" unsigned int __v) {\n"
1388" return __nvvm_atom_sys_add_gen_i((int *)__p, __v);\n"
1389"}\n"
1390"__DEVICE__ unsigned int __uAtomicAnd(unsigned int *__p, unsigned int __v) {\n"
1391" return __nvvm_atom_and_gen_i((int *)__p, __v);\n"
1392"}\n"
1393"__DEVICE__ unsigned int __uAtomicAnd_block(unsigned int *__p,\n"
1394" unsigned int __v) {\n"
1395" return __nvvm_atom_cta_and_gen_i((int *)__p, __v);\n"
1396"}\n"
1397"__DEVICE__ unsigned int __uAtomicAnd_system(unsigned int *__p,\n"
1398" unsigned int __v) {\n"
1399" return __nvvm_atom_sys_and_gen_i((int *)__p, __v);\n"
1400"}\n"
1401"__DEVICE__ unsigned int __uAtomicCAS(unsigned int *__p, unsigned int __cmp,\n"
1402" unsigned int __v) {\n"
1403" return __nvvm_atom_cas_gen_i((int *)__p, __cmp, __v);\n"
1404"}\n"
1405"__DEVICE__ unsigned int\n"
1406"__uAtomicCAS_block(unsigned int *__p, unsigned int __cmp, unsigned int __v) {\n"
1407" return __nvvm_atom_cta_cas_gen_i((int *)__p, __cmp, __v);\n"
1408"}\n"
1409"__DEVICE__ unsigned int\n"
1410"__uAtomicCAS_system(unsigned int *__p, unsigned int __cmp, unsigned int __v) {\n"
1411" return __nvvm_atom_sys_cas_gen_i((int *)__p, __cmp, __v);\n"
1412"}\n"
1413"__DEVICE__ unsigned int __uAtomicDec(unsigned int *__p, unsigned int __v) {\n"
1414" return __nvvm_atom_dec_gen_ui(__p, __v);\n"
1415"}\n"
1416"__DEVICE__ unsigned int __uAtomicDec_block(unsigned int *__p,\n"
1417" unsigned int __v) {\n"
1418" return __nvvm_atom_cta_dec_gen_ui(__p, __v);\n"
1419"}\n"
1420"__DEVICE__ unsigned int __uAtomicDec_system(unsigned int *__p,\n"
1421" unsigned int __v) {\n"
1422" return __nvvm_atom_sys_dec_gen_ui(__p, __v);\n"
1423"}\n"
1424"__DEVICE__ unsigned int __uAtomicExch(unsigned int *__p, unsigned int __v) {\n"
1425" return __nvvm_atom_xchg_gen_i((int *)__p, __v);\n"
1426"}\n"
1427"__DEVICE__ unsigned int __uAtomicExch_block(unsigned int *__p,\n"
1428" unsigned int __v) {\n"
1429" return __nvvm_atom_cta_xchg_gen_i((int *)__p, __v);\n"
1430"}\n"
1431"__DEVICE__ unsigned int __uAtomicExch_system(unsigned int *__p,\n"
1432" unsigned int __v) {\n"
1433" return __nvvm_atom_sys_xchg_gen_i((int *)__p, __v);\n"
1434"}\n"
1435"__DEVICE__ unsigned int __uAtomicInc(unsigned int *__p, unsigned int __v) {\n"
1436" return __nvvm_atom_inc_gen_ui(__p, __v);\n"
1437"}\n"
1438"__DEVICE__ unsigned int __uAtomicInc_block(unsigned int *__p,\n"
1439" unsigned int __v) {\n"
1440" return __nvvm_atom_cta_inc_gen_ui(__p, __v);\n"
1441"}\n"
1442"__DEVICE__ unsigned int __uAtomicInc_system(unsigned int *__p,\n"
1443" unsigned int __v) {\n"
1444" return __nvvm_atom_sys_inc_gen_ui(__p, __v);\n"
1445"}\n"
1446"__DEVICE__ unsigned int __uAtomicMax(unsigned int *__p, unsigned int __v) {\n"
1447" return __nvvm_atom_max_gen_ui(__p, __v);\n"
1448"}\n"
1449"__DEVICE__ unsigned int __uAtomicMax_block(unsigned int *__p,\n"
1450" unsigned int __v) {\n"
1451" return __nvvm_atom_cta_max_gen_ui(__p, __v);\n"
1452"}\n"
1453"__DEVICE__ unsigned int __uAtomicMax_system(unsigned int *__p,\n"
1454" unsigned int __v) {\n"
1455" return __nvvm_atom_sys_max_gen_ui(__p, __v);\n"
1456"}\n"
1457"__DEVICE__ unsigned int __uAtomicMin(unsigned int *__p, unsigned int __v) {\n"
1458" return __nvvm_atom_min_gen_ui(__p, __v);\n"
1459"}\n"
1460"__DEVICE__ unsigned int __uAtomicMin_block(unsigned int *__p,\n"
1461" unsigned int __v) {\n"
1462" return __nvvm_atom_cta_min_gen_ui(__p, __v);\n"
1463"}\n"
1464"__DEVICE__ unsigned int __uAtomicMin_system(unsigned int *__p,\n"
1465" unsigned int __v) {\n"
1466" return __nvvm_atom_sys_min_gen_ui(__p, __v);\n"
1467"}\n"
1468"__DEVICE__ unsigned int __uAtomicOr(unsigned int *__p, unsigned int __v) {\n"
1469" return __nvvm_atom_or_gen_i((int *)__p, __v);\n"
1470"}\n"
1471"__DEVICE__ unsigned int __uAtomicOr_block(unsigned int *__p, unsigned int __v) {\n"
1472" return __nvvm_atom_cta_or_gen_i((int *)__p, __v);\n"
1473"}\n"
1474"__DEVICE__ unsigned int __uAtomicOr_system(unsigned int *__p,\n"
1475" unsigned int __v) {\n"
1476" return __nvvm_atom_sys_or_gen_i((int *)__p, __v);\n"
1477"}\n"
1478"__DEVICE__ unsigned int __uAtomicXor(unsigned int *__p, unsigned int __v) {\n"
1479" return __nvvm_atom_xor_gen_i((int *)__p, __v);\n"
1480"}\n"
1481"__DEVICE__ unsigned int __uAtomicXor_block(unsigned int *__p,\n"
1482" unsigned int __v) {\n"
1483" return __nvvm_atom_cta_xor_gen_i((int *)__p, __v);\n"
1484"}\n"
1485"__DEVICE__ unsigned int __uAtomicXor_system(unsigned int *__p,\n"
1486" unsigned int __v) {\n"
1487" return __nvvm_atom_sys_xor_gen_i((int *)__p, __v);\n"
1488"}\n"
1489"__DEVICE__ unsigned int __uhadd(unsigned int __a, unsigned int __b) {\n"
1490" return __nv_uhadd(__a, __b);\n"
1491"}\n"
1492"__DEVICE__ double __uint2double_rn(unsigned int __a) {\n"
1493" return __nv_uint2double_rn(__a);\n"
1494"}\n"
1495"__DEVICE__ float __uint2float_rd(unsigned int __a) {\n"
1496" return __nv_uint2float_rd(__a);\n"
1497"}\n"
1498"__DEVICE__ float __uint2float_rn(unsigned int __a) {\n"
1499" return __nv_uint2float_rn(__a);\n"
1500"}\n"
1501"__DEVICE__ float __uint2float_ru(unsigned int __a) {\n"
1502" return __nv_uint2float_ru(__a);\n"
1503"}\n"
1504"__DEVICE__ float __uint2float_rz(unsigned int __a) {\n"
1505" return __nv_uint2float_rz(__a);\n"
1506"}\n"
1507"__DEVICE__ float __uint_as_float(unsigned int __a) {\n"
1508" return __nv_uint_as_float(__a);\n"
1509"} //\n"
1510"__DEVICE__ double __ull2double_rd(unsigned long long __a) {\n"
1511" return __nv_ull2double_rd(__a);\n"
1512"}\n"
1513"__DEVICE__ double __ull2double_rn(unsigned long long __a) {\n"
1514" return __nv_ull2double_rn(__a);\n"
1515"}\n"
1516"__DEVICE__ double __ull2double_ru(unsigned long long __a) {\n"
1517" return __nv_ull2double_ru(__a);\n"
1518"}\n"
1519"__DEVICE__ double __ull2double_rz(unsigned long long __a) {\n"
1520" return __nv_ull2double_rz(__a);\n"
1521"}\n"
1522"__DEVICE__ float __ull2float_rd(unsigned long long __a) {\n"
1523" return __nv_ull2float_rd(__a);\n"
1524"}\n"
1525"__DEVICE__ float __ull2float_rn(unsigned long long __a) {\n"
1526" return __nv_ull2float_rn(__a);\n"
1527"}\n"
1528"__DEVICE__ float __ull2float_ru(unsigned long long __a) {\n"
1529" return __nv_ull2float_ru(__a);\n"
1530"}\n"
1531"__DEVICE__ float __ull2float_rz(unsigned long long __a) {\n"
1532" return __nv_ull2float_rz(__a);\n"
1533"}\n"
1534"__DEVICE__ unsigned long long __ullAtomicAdd(unsigned long long *__p,\n"
1535" unsigned long long __v) {\n"
1536" return __nvvm_atom_add_gen_ll((long long *)__p, __v);\n"
1537"}\n"
1538"__DEVICE__ unsigned long long __ullAtomicAdd_block(unsigned long long *__p,\n"
1539" unsigned long long __v) {\n"
1540" return __nvvm_atom_cta_add_gen_ll((long long *)__p, __v);\n"
1541"}\n"
1542"__DEVICE__ unsigned long long __ullAtomicAdd_system(unsigned long long *__p,\n"
1543" unsigned long long __v) {\n"
1544" return __nvvm_atom_sys_add_gen_ll((long long *)__p, __v);\n"
1545"}\n"
1546"__DEVICE__ unsigned long long __ullAtomicAnd(unsigned long long *__p,\n"
1547" unsigned long long __v) {\n"
1548" return __nvvm_atom_and_gen_ll((long long *)__p, __v);\n"
1549"}\n"
1550"__DEVICE__ unsigned long long __ullAtomicAnd_block(unsigned long long *__p,\n"
1551" unsigned long long __v) {\n"
1552" return __nvvm_atom_cta_and_gen_ll((long long *)__p, __v);\n"
1553"}\n"
1554"__DEVICE__ unsigned long long __ullAtomicAnd_system(unsigned long long *__p,\n"
1555" unsigned long long __v) {\n"
1556" return __nvvm_atom_sys_and_gen_ll((long long *)__p, __v);\n"
1557"}\n"
1558"__DEVICE__ unsigned long long __ullAtomicCAS(unsigned long long *__p,\n"
1559" unsigned long long __cmp,\n"
1560" unsigned long long __v) {\n"
1561" return __nvvm_atom_cas_gen_ll((long long *)__p, __cmp, __v);\n"
1562"}\n"
1563"__DEVICE__ unsigned long long __ullAtomicCAS_block(unsigned long long *__p,\n"
1564" unsigned long long __cmp,\n"
1565" unsigned long long __v) {\n"
1566" return __nvvm_atom_cta_cas_gen_ll((long long *)__p, __cmp, __v);\n"
1567"}\n"
1568"__DEVICE__ unsigned long long __ullAtomicCAS_system(unsigned long long *__p,\n"
1569" unsigned long long __cmp,\n"
1570" unsigned long long __v) {\n"
1571" return __nvvm_atom_sys_cas_gen_ll((long long *)__p, __cmp, __v);\n"
1572"}\n"
1573"__DEVICE__ unsigned long long __ullAtomicExch(unsigned long long *__p,\n"
1574" unsigned long long __v) {\n"
1575" return __nvvm_atom_xchg_gen_ll((long long *)__p, __v);\n"
1576"}\n"
1577"__DEVICE__ unsigned long long __ullAtomicExch_block(unsigned long long *__p,\n"
1578" unsigned long long __v) {\n"
1579" return __nvvm_atom_cta_xchg_gen_ll((long long *)__p, __v);\n"
1580"}\n"
1581"__DEVICE__ unsigned long long __ullAtomicExch_system(unsigned long long *__p,\n"
1582" unsigned long long __v) {\n"
1583" return __nvvm_atom_sys_xchg_gen_ll((long long *)__p, __v);\n"
1584"}\n"
1585"__DEVICE__ unsigned long long __ullAtomicMax(unsigned long long *__p,\n"
1586" unsigned long long __v) {\n"
1587" return __nvvm_atom_max_gen_ull(__p, __v);\n"
1588"}\n"
1589"__DEVICE__ unsigned long long __ullAtomicMax_block(unsigned long long *__p,\n"
1590" unsigned long long __v) {\n"
1591" return __nvvm_atom_cta_max_gen_ull(__p, __v);\n"
1592"}\n"
1593"__DEVICE__ unsigned long long __ullAtomicMax_system(unsigned long long *__p,\n"
1594" unsigned long long __v) {\n"
1595" return __nvvm_atom_sys_max_gen_ull(__p, __v);\n"
1596"}\n"
1597"__DEVICE__ unsigned long long __ullAtomicMin(unsigned long long *__p,\n"
1598" unsigned long long __v) {\n"
1599" return __nvvm_atom_min_gen_ull(__p, __v);\n"
1600"}\n"
1601"__DEVICE__ unsigned long long __ullAtomicMin_block(unsigned long long *__p,\n"
1602" unsigned long long __v) {\n"
1603" return __nvvm_atom_cta_min_gen_ull(__p, __v);\n"
1604"}\n"
1605"__DEVICE__ unsigned long long __ullAtomicMin_system(unsigned long long *__p,\n"
1606" unsigned long long __v) {\n"
1607" return __nvvm_atom_sys_min_gen_ull(__p, __v);\n"
1608"}\n"
1609"__DEVICE__ unsigned long long __ullAtomicOr(unsigned long long *__p,\n"
1610" unsigned long long __v) {\n"
1611" return __nvvm_atom_or_gen_ll((long long *)__p, __v);\n"
1612"}\n"
1613"__DEVICE__ unsigned long long __ullAtomicOr_block(unsigned long long *__p,\n"
1614" unsigned long long __v) {\n"
1615" return __nvvm_atom_cta_or_gen_ll((long long *)__p, __v);\n"
1616"}\n"
1617"__DEVICE__ unsigned long long __ullAtomicOr_system(unsigned long long *__p,\n"
1618" unsigned long long __v) {\n"
1619" return __nvvm_atom_sys_or_gen_ll((long long *)__p, __v);\n"
1620"}\n"
1621"__DEVICE__ unsigned long long __ullAtomicXor(unsigned long long *__p,\n"
1622" unsigned long long __v) {\n"
1623" return __nvvm_atom_xor_gen_ll((long long *)__p, __v);\n"
1624"}\n"
1625"__DEVICE__ unsigned long long __ullAtomicXor_block(unsigned long long *__p,\n"
1626" unsigned long long __v) {\n"
1627" return __nvvm_atom_cta_xor_gen_ll((long long *)__p, __v);\n"
1628"}\n"
1629"__DEVICE__ unsigned long long __ullAtomicXor_system(unsigned long long *__p,\n"
1630" unsigned long long __v) {\n"
1631" return __nvvm_atom_sys_xor_gen_ll((long long *)__p, __v);\n"
1632"}\n"
1633"__DEVICE__ unsigned int __umul24(unsigned int __a, unsigned int __b) {\n"
1634" return __nv_umul24(__a, __b);\n"
1635"}\n"
1636"__DEVICE__ unsigned long long __umul64hi(unsigned long long __a,\n"
1637" unsigned long long __b) {\n"
1638" return __nv_umul64hi(__a, __b);\n"
1639"}\n"
1640"__DEVICE__ unsigned int __umulhi(unsigned int __a, unsigned int __b) {\n"
1641" return __nv_umulhi(__a, __b);\n"
1642"}\n"
1643"__DEVICE__ unsigned int __urhadd(unsigned int __a, unsigned int __b) {\n"
1644" return __nv_urhadd(__a, __b);\n"
1645"}\n"
1646"__DEVICE__ unsigned int __usad(unsigned int __a, unsigned int __b,\n"
1647" unsigned int __c) {\n"
1648" return __nv_usad(__a, __b, __c);\n"
1649"}\n"
1650"\n"
1651"#if CUDA_VERSION >= 9000 && CUDA_VERSION < 9020\n"
1652"__DEVICE__ unsigned int __vabs2(unsigned int __a) { return __nv_vabs2(__a); }\n"
1653"__DEVICE__ unsigned int __vabs4(unsigned int __a) { return __nv_vabs4(__a); }\n"
1654"__DEVICE__ unsigned int __vabsdiffs2(unsigned int __a, unsigned int __b) {\n"
1655" return __nv_vabsdiffs2(__a, __b);\n"
1656"}\n"
1657"__DEVICE__ unsigned int __vabsdiffs4(unsigned int __a, unsigned int __b) {\n"
1658" return __nv_vabsdiffs4(__a, __b);\n"
1659"}\n"
1660"__DEVICE__ unsigned int __vabsdiffu2(unsigned int __a, unsigned int __b) {\n"
1661" return __nv_vabsdiffu2(__a, __b);\n"
1662"}\n"
1663"__DEVICE__ unsigned int __vabsdiffu4(unsigned int __a, unsigned int __b) {\n"
1664" return __nv_vabsdiffu4(__a, __b);\n"
1665"}\n"
1666"__DEVICE__ unsigned int __vabsss2(unsigned int __a) {\n"
1667" return __nv_vabsss2(__a);\n"
1668"}\n"
1669"__DEVICE__ unsigned int __vabsss4(unsigned int __a) {\n"
1670" return __nv_vabsss4(__a);\n"
1671"}\n"
1672"__DEVICE__ unsigned int __vadd2(unsigned int __a, unsigned int __b) {\n"
1673" return __nv_vadd2(__a, __b);\n"
1674"}\n"
1675"__DEVICE__ unsigned int __vadd4(unsigned int __a, unsigned int __b) {\n"
1676" return __nv_vadd4(__a, __b);\n"
1677"}\n"
1678"__DEVICE__ unsigned int __vaddss2(unsigned int __a, unsigned int __b) {\n"
1679" return __nv_vaddss2(__a, __b);\n"
1680"}\n"
1681"__DEVICE__ unsigned int __vaddss4(unsigned int __a, unsigned int __b) {\n"
1682" return __nv_vaddss4(__a, __b);\n"
1683"}\n"
1684"__DEVICE__ unsigned int __vaddus2(unsigned int __a, unsigned int __b) {\n"
1685" return __nv_vaddus2(__a, __b);\n"
1686"}\n"
1687"__DEVICE__ unsigned int __vaddus4(unsigned int __a, unsigned int __b) {\n"
1688" return __nv_vaddus4(__a, __b);\n"
1689"}\n"
1690"__DEVICE__ unsigned int __vavgs2(unsigned int __a, unsigned int __b) {\n"
1691" return __nv_vavgs2(__a, __b);\n"
1692"}\n"
1693"__DEVICE__ unsigned int __vavgs4(unsigned int __a, unsigned int __b) {\n"
1694" return __nv_vavgs4(__a, __b);\n"
1695"}\n"
1696"__DEVICE__ unsigned int __vavgu2(unsigned int __a, unsigned int __b) {\n"
1697" return __nv_vavgu2(__a, __b);\n"
1698"}\n"
1699"__DEVICE__ unsigned int __vavgu4(unsigned int __a, unsigned int __b) {\n"
1700" return __nv_vavgu4(__a, __b);\n"
1701"}\n"
1702"__DEVICE__ unsigned int __vcmpeq2(unsigned int __a, unsigned int __b) {\n"
1703" return __nv_vcmpeq2(__a, __b);\n"
1704"}\n"
1705"__DEVICE__ unsigned int __vcmpeq4(unsigned int __a, unsigned int __b) {\n"
1706" return __nv_vcmpeq4(__a, __b);\n"
1707"}\n"
1708"__DEVICE__ unsigned int __vcmpges2(unsigned int __a, unsigned int __b) {\n"
1709" return __nv_vcmpges2(__a, __b);\n"
1710"}\n"
1711"__DEVICE__ unsigned int __vcmpges4(unsigned int __a, unsigned int __b) {\n"
1712" return __nv_vcmpges4(__a, __b);\n"
1713"}\n"
1714"__DEVICE__ unsigned int __vcmpgeu2(unsigned int __a, unsigned int __b) {\n"
1715" return __nv_vcmpgeu2(__a, __b);\n"
1716"}\n"
1717"__DEVICE__ unsigned int __vcmpgeu4(unsigned int __a, unsigned int __b) {\n"
1718" return __nv_vcmpgeu4(__a, __b);\n"
1719"}\n"
1720"__DEVICE__ unsigned int __vcmpgts2(unsigned int __a, unsigned int __b) {\n"
1721" return __nv_vcmpgts2(__a, __b);\n"
1722"}\n"
1723"__DEVICE__ unsigned int __vcmpgts4(unsigned int __a, unsigned int __b) {\n"
1724" return __nv_vcmpgts4(__a, __b);\n"
1725"}\n"
1726"__DEVICE__ unsigned int __vcmpgtu2(unsigned int __a, unsigned int __b) {\n"
1727" return __nv_vcmpgtu2(__a, __b);\n"
1728"}\n"
1729"__DEVICE__ unsigned int __vcmpgtu4(unsigned int __a, unsigned int __b) {\n"
1730" return __nv_vcmpgtu4(__a, __b);\n"
1731"}\n"
1732"__DEVICE__ unsigned int __vcmples2(unsigned int __a, unsigned int __b) {\n"
1733" return __nv_vcmples2(__a, __b);\n"
1734"}\n"
1735"__DEVICE__ unsigned int __vcmples4(unsigned int __a, unsigned int __b) {\n"
1736" return __nv_vcmples4(__a, __b);\n"
1737"}\n"
1738"__DEVICE__ unsigned int __vcmpleu2(unsigned int __a, unsigned int __b) {\n"
1739" return __nv_vcmpleu2(__a, __b);\n"
1740"}\n"
1741"__DEVICE__ unsigned int __vcmpleu4(unsigned int __a, unsigned int __b) {\n"
1742" return __nv_vcmpleu4(__a, __b);\n"
1743"}\n"
1744"__DEVICE__ unsigned int __vcmplts2(unsigned int __a, unsigned int __b) {\n"
1745" return __nv_vcmplts2(__a, __b);\n"
1746"}\n"
1747"__DEVICE__ unsigned int __vcmplts4(unsigned int __a, unsigned int __b) {\n"
1748" return __nv_vcmplts4(__a, __b);\n"
1749"}\n"
1750"__DEVICE__ unsigned int __vcmpltu2(unsigned int __a, unsigned int __b) {\n"
1751" return __nv_vcmpltu2(__a, __b);\n"
1752"}\n"
1753"__DEVICE__ unsigned int __vcmpltu4(unsigned int __a, unsigned int __b) {\n"
1754" return __nv_vcmpltu4(__a, __b);\n"
1755"}\n"
1756"__DEVICE__ unsigned int __vcmpne2(unsigned int __a, unsigned int __b) {\n"
1757" return __nv_vcmpne2(__a, __b);\n"
1758"}\n"
1759"__DEVICE__ unsigned int __vcmpne4(unsigned int __a, unsigned int __b) {\n"
1760" return __nv_vcmpne4(__a, __b);\n"
1761"}\n"
1762"__DEVICE__ unsigned int __vhaddu2(unsigned int __a, unsigned int __b) {\n"
1763" return __nv_vhaddu2(__a, __b);\n"
1764"}\n"
1765"__DEVICE__ unsigned int __vhaddu4(unsigned int __a, unsigned int __b) {\n"
1766" return __nv_vhaddu4(__a, __b);\n"
1767"}\n"
1768"__DEVICE__ unsigned int __vmaxs2(unsigned int __a, unsigned int __b) {\n"
1769" return __nv_vmaxs2(__a, __b);\n"
1770"}\n"
1771"__DEVICE__ unsigned int __vmaxs4(unsigned int __a, unsigned int __b) {\n"
1772" return __nv_vmaxs4(__a, __b);\n"
1773"}\n"
1774"__DEVICE__ unsigned int __vmaxu2(unsigned int __a, unsigned int __b) {\n"
1775" return __nv_vmaxu2(__a, __b);\n"
1776"}\n"
1777"__DEVICE__ unsigned int __vmaxu4(unsigned int __a, unsigned int __b) {\n"
1778" return __nv_vmaxu4(__a, __b);\n"
1779"}\n"
1780"__DEVICE__ unsigned int __vmins2(unsigned int __a, unsigned int __b) {\n"
1781" return __nv_vmins2(__a, __b);\n"
1782"}\n"
1783"__DEVICE__ unsigned int __vmins4(unsigned int __a, unsigned int __b) {\n"
1784" return __nv_vmins4(__a, __b);\n"
1785"}\n"
1786"__DEVICE__ unsigned int __vminu2(unsigned int __a, unsigned int __b) {\n"
1787" return __nv_vminu2(__a, __b);\n"
1788"}\n"
1789"__DEVICE__ unsigned int __vminu4(unsigned int __a, unsigned int __b) {\n"
1790" return __nv_vminu4(__a, __b);\n"
1791"}\n"
1792"__DEVICE__ unsigned int __vneg2(unsigned int __a) { return __nv_vneg2(__a); }\n"
1793"__DEVICE__ unsigned int __vneg4(unsigned int __a) { return __nv_vneg4(__a); }\n"
1794"__DEVICE__ unsigned int __vnegss2(unsigned int __a) {\n"
1795" return __nv_vnegss2(__a);\n"
1796"}\n"
1797"__DEVICE__ unsigned int __vnegss4(unsigned int __a) {\n"
1798" return __nv_vnegss4(__a);\n"
1799"}\n"
1800"__DEVICE__ unsigned int __vsads2(unsigned int __a, unsigned int __b) {\n"
1801" return __nv_vsads2(__a, __b);\n"
1802"}\n"
1803"__DEVICE__ unsigned int __vsads4(unsigned int __a, unsigned int __b) {\n"
1804" return __nv_vsads4(__a, __b);\n"
1805"}\n"
1806"__DEVICE__ unsigned int __vsadu2(unsigned int __a, unsigned int __b) {\n"
1807" return __nv_vsadu2(__a, __b);\n"
1808"}\n"
1809"__DEVICE__ unsigned int __vsadu4(unsigned int __a, unsigned int __b) {\n"
1810" return __nv_vsadu4(__a, __b);\n"
1811"}\n"
1812"__DEVICE__ unsigned int __vseteq2(unsigned int __a, unsigned int __b) {\n"
1813" return __nv_vseteq2(__a, __b);\n"
1814"}\n"
1815"__DEVICE__ unsigned int __vseteq4(unsigned int __a, unsigned int __b) {\n"
1816" return __nv_vseteq4(__a, __b);\n"
1817"}\n"
1818"__DEVICE__ unsigned int __vsetges2(unsigned int __a, unsigned int __b) {\n"
1819" return __nv_vsetges2(__a, __b);\n"
1820"}\n"
1821"__DEVICE__ unsigned int __vsetges4(unsigned int __a, unsigned int __b) {\n"
1822" return __nv_vsetges4(__a, __b);\n"
1823"}\n"
1824"__DEVICE__ unsigned int __vsetgeu2(unsigned int __a, unsigned int __b) {\n"
1825" return __nv_vsetgeu2(__a, __b);\n"
1826"}\n"
1827"__DEVICE__ unsigned int __vsetgeu4(unsigned int __a, unsigned int __b) {\n"
1828" return __nv_vsetgeu4(__a, __b);\n"
1829"}\n"
1830"__DEVICE__ unsigned int __vsetgts2(unsigned int __a, unsigned int __b) {\n"
1831" return __nv_vsetgts2(__a, __b);\n"
1832"}\n"
1833"__DEVICE__ unsigned int __vsetgts4(unsigned int __a, unsigned int __b) {\n"
1834" return __nv_vsetgts4(__a, __b);\n"
1835"}\n"
1836"__DEVICE__ unsigned int __vsetgtu2(unsigned int __a, unsigned int __b) {\n"
1837" return __nv_vsetgtu2(__a, __b);\n"
1838"}\n"
1839"__DEVICE__ unsigned int __vsetgtu4(unsigned int __a, unsigned int __b) {\n"
1840" return __nv_vsetgtu4(__a, __b);\n"
1841"}\n"
1842"__DEVICE__ unsigned int __vsetles2(unsigned int __a, unsigned int __b) {\n"
1843" return __nv_vsetles2(__a, __b);\n"
1844"}\n"
1845"__DEVICE__ unsigned int __vsetles4(unsigned int __a, unsigned int __b) {\n"
1846" return __nv_vsetles4(__a, __b);\n"
1847"}\n"
1848"__DEVICE__ unsigned int __vsetleu2(unsigned int __a, unsigned int __b) {\n"
1849" return __nv_vsetleu2(__a, __b);\n"
1850"}\n"
1851"__DEVICE__ unsigned int __vsetleu4(unsigned int __a, unsigned int __b) {\n"
1852" return __nv_vsetleu4(__a, __b);\n"
1853"}\n"
1854"__DEVICE__ unsigned int __vsetlts2(unsigned int __a, unsigned int __b) {\n"
1855" return __nv_vsetlts2(__a, __b);\n"
1856"}\n"
1857"__DEVICE__ unsigned int __vsetlts4(unsigned int __a, unsigned int __b) {\n"
1858" return __nv_vsetlts4(__a, __b);\n"
1859"}\n"
1860"__DEVICE__ unsigned int __vsetltu2(unsigned int __a, unsigned int __b) {\n"
1861" return __nv_vsetltu2(__a, __b);\n"
1862"}\n"
1863"__DEVICE__ unsigned int __vsetltu4(unsigned int __a, unsigned int __b) {\n"
1864" return __nv_vsetltu4(__a, __b);\n"
1865"}\n"
1866"__DEVICE__ unsigned int __vsetne2(unsigned int __a, unsigned int __b) {\n"
1867" return __nv_vsetne2(__a, __b);\n"
1868"}\n"
1869"__DEVICE__ unsigned int __vsetne4(unsigned int __a, unsigned int __b) {\n"
1870" return __nv_vsetne4(__a, __b);\n"
1871"}\n"
1872"__DEVICE__ unsigned int __vsub2(unsigned int __a, unsigned int __b) {\n"
1873" return __nv_vsub2(__a, __b);\n"
1874"}\n"
1875"__DEVICE__ unsigned int __vsub4(unsigned int __a, unsigned int __b) {\n"
1876" return __nv_vsub4(__a, __b);\n"
1877"}\n"
1878"__DEVICE__ unsigned int __vsubss2(unsigned int __a, unsigned int __b) {\n"
1879" return __nv_vsubss2(__a, __b);\n"
1880"}\n"
1881"__DEVICE__ unsigned int __vsubss4(unsigned int __a, unsigned int __b) {\n"
1882" return __nv_vsubss4(__a, __b);\n"
1883"}\n"
1884"__DEVICE__ unsigned int __vsubus2(unsigned int __a, unsigned int __b) {\n"
1885" return __nv_vsubus2(__a, __b);\n"
1886"}\n"
1887"__DEVICE__ unsigned int __vsubus4(unsigned int __a, unsigned int __b) {\n"
1888" return __nv_vsubus4(__a, __b);\n"
1889"}\n"
1890"#else // CUDA_VERSION >= 9020\n"
1891"// CUDA no longer provides inline assembly (or bitcode) implementation of these\n"
1892"// functions, so we have to reimplment them. The implementation is naive and is\n"
1893"// not optimized for performance.\n"
1894"\n"
1895"// Helper function to convert N-bit boolean subfields into all-0 or all-1.\n"
1896"// E.g. __bool2mask(0x01000100,8) -> 0xff00ff00\n"
1897"// __bool2mask(0x00010000,16) -> 0xffff0000\n"
1898"__DEVICE__ unsigned int __bool2mask(unsigned int __a, int shift) {\n"
1899" return (__a << shift) - __a;\n"
1900"}\n"
1901"__DEVICE__ unsigned int __vabs2(unsigned int __a) {\n"
1902" unsigned int r;\n"
1903" asm(\"vabsdiff2.s32.s32.s32 %0,%1,%2,%3;\"\n"
1904" : \"=r\"(r)\n"
1905" : \"r\"(__a), \"r\"(0), \"r\"(0));\n"
1906" return r;\n"
1907"}\n"
1908"__DEVICE__ unsigned int __vabs4(unsigned int __a) {\n"
1909" unsigned int r;\n"
1910" asm(\"vabsdiff4.s32.s32.s32 %0,%1,%2,%3;\"\n"
1911" : \"=r\"(r)\n"
1912" : \"r\"(__a), \"r\"(0), \"r\"(0));\n"
1913" return r;\n"
1914"}\n"
1915"__DEVICE__ unsigned int __vabsdiffs2(unsigned int __a, unsigned int __b) {\n"
1916" unsigned int r;\n"
1917" asm(\"vabsdiff2.s32.s32.s32 %0,%1,%2,%3;\"\n"
1918" : \"=r\"(r)\n"
1919" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
1920" return r;\n"
1921"}\n"
1922"\n"
1923"__DEVICE__ unsigned int __vabsdiffs4(unsigned int __a, unsigned int __b) {\n"
1924" unsigned int r;\n"
1925" asm(\"vabsdiff4.s32.s32.s32 %0,%1,%2,%3;\"\n"
1926" : \"=r\"(r)\n"
1927" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
1928" return r;\n"
1929"}\n"
1930"__DEVICE__ unsigned int __vabsdiffu2(unsigned int __a, unsigned int __b) {\n"
1931" unsigned int r;\n"
1932" asm(\"vabsdiff2.u32.u32.u32 %0,%1,%2,%3;\"\n"
1933" : \"=r\"(r)\n"
1934" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
1935" return r;\n"
1936"}\n"
1937"__DEVICE__ unsigned int __vabsdiffu4(unsigned int __a, unsigned int __b) {\n"
1938" unsigned int r;\n"
1939" asm(\"vabsdiff4.u32.u32.u32 %0,%1,%2,%3;\"\n"
1940" : \"=r\"(r)\n"
1941" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
1942" return r;\n"
1943"}\n"
1944"__DEVICE__ unsigned int __vabsss2(unsigned int __a) {\n"
1945" unsigned int r;\n"
1946" asm(\"vabsdiff2.s32.s32.s32.sat %0,%1,%2,%3;\"\n"
1947" : \"=r\"(r)\n"
1948" : \"r\"(__a), \"r\"(0), \"r\"(0));\n"
1949" return r;\n"
1950"}\n"
1951"__DEVICE__ unsigned int __vabsss4(unsigned int __a) {\n"
1952" unsigned int r;\n"
1953" asm(\"vabsdiff4.s32.s32.s32.sat %0,%1,%2,%3;\"\n"
1954" : \"=r\"(r)\n"
1955" : \"r\"(__a), \"r\"(0), \"r\"(0));\n"
1956" return r;\n"
1957"}\n"
1958"__DEVICE__ unsigned int __vadd2(unsigned int __a, unsigned int __b) {\n"
1959" unsigned int r;\n"
1960" asm(\"vadd2.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
1961" return r;\n"
1962"}\n"
1963"__DEVICE__ unsigned int __vadd4(unsigned int __a, unsigned int __b) {\n"
1964" unsigned int r;\n"
1965" asm(\"vadd4.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
1966" return r;\n"
1967"}\n"
1968"__DEVICE__ unsigned int __vaddss2(unsigned int __a, unsigned int __b) {\n"
1969" unsigned int r;\n"
1970" asm(\"vadd2.s32.s32.s32.sat %0,%1,%2,%3;\"\n"
1971" : \"=r\"(r)\n"
1972" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
1973" return r;\n"
1974"}\n"
1975"__DEVICE__ unsigned int __vaddss4(unsigned int __a, unsigned int __b) {\n"
1976" unsigned int r;\n"
1977" asm(\"vadd4.s32.s32.s32.sat %0,%1,%2,%3;\"\n"
1978" : \"=r\"(r)\n"
1979" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
1980" return r;\n"
1981"}\n"
1982"__DEVICE__ unsigned int __vaddus2(unsigned int __a, unsigned int __b) {\n"
1983" unsigned int r;\n"
1984" asm(\"vadd2.u32.u32.u32.sat %0,%1,%2,%3;\"\n"
1985" : \"=r\"(r)\n"
1986" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
1987" return r;\n"
1988"}\n"
1989"__DEVICE__ unsigned int __vaddus4(unsigned int __a, unsigned int __b) {\n"
1990" unsigned int r;\n"
1991" asm(\"vadd4.u32.u32.u32.sat %0,%1,%2,%3;\"\n"
1992" : \"=r\"(r)\n"
1993" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
1994" return r;\n"
1995"}\n"
1996"__DEVICE__ unsigned int __vavgs2(unsigned int __a, unsigned int __b) {\n"
1997" unsigned int r;\n"
1998" asm(\"vavrg2.s32.s32.s32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
1999" return r;\n"
2000"}\n"
2001"__DEVICE__ unsigned int __vavgs4(unsigned int __a, unsigned int __b) {\n"
2002" unsigned int r;\n"
2003" asm(\"vavrg4.s32.s32.s32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2004" return r;\n"
2005"}\n"
2006"__DEVICE__ unsigned int __vavgu2(unsigned int __a, unsigned int __b) {\n"
2007" unsigned int r;\n"
2008" asm(\"vavrg2.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2009" return r;\n"
2010"}\n"
2011"__DEVICE__ unsigned int __vavgu4(unsigned int __a, unsigned int __b) {\n"
2012" unsigned int r;\n"
2013" asm(\"vavrg4.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2014" return r;\n"
2015"}\n"
2016"__DEVICE__ unsigned int __vseteq2(unsigned int __a, unsigned int __b) {\n"
2017" unsigned int r;\n"
2018" asm(\"vset2.u32.u32.eq %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2019" return r;\n"
2020"}\n"
2021"__DEVICE__ unsigned int __vcmpeq2(unsigned int __a, unsigned int __b) {\n"
2022" return __bool2mask(__vseteq2(__a, __b), 16);\n"
2023"}\n"
2024"__DEVICE__ unsigned int __vseteq4(unsigned int __a, unsigned int __b) {\n"
2025" unsigned int r;\n"
2026" asm(\"vset4.u32.u32.eq %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2027" return r;\n"
2028"}\n"
2029"__DEVICE__ unsigned int __vcmpeq4(unsigned int __a, unsigned int __b) {\n"
2030" return __bool2mask(__vseteq4(__a, __b), 8);\n"
2031"}\n"
2032"__DEVICE__ unsigned int __vsetges2(unsigned int __a, unsigned int __b) {\n"
2033" unsigned int r;\n"
2034" asm(\"vset2.s32.s32.ge %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2035" return r;\n"
2036"}\n"
2037"__DEVICE__ unsigned int __vcmpges2(unsigned int __a, unsigned int __b) {\n"
2038" return __bool2mask(__vsetges2(__a, __b), 16);\n"
2039"}\n"
2040"__DEVICE__ unsigned int __vsetges4(unsigned int __a, unsigned int __b) {\n"
2041" unsigned int r;\n"
2042" asm(\"vset4.s32.s32.ge %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2043" return r;\n"
2044"}\n"
2045"__DEVICE__ unsigned int __vcmpges4(unsigned int __a, unsigned int __b) {\n"
2046" return __bool2mask(__vsetges4(__a, __b), 8);\n"
2047"}\n"
2048"__DEVICE__ unsigned int __vsetgeu2(unsigned int __a, unsigned int __b) {\n"
2049" unsigned int r;\n"
2050" asm(\"vset2.u32.u32.ge %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2051" return r;\n"
2052"}\n"
2053"__DEVICE__ unsigned int __vcmpgeu2(unsigned int __a, unsigned int __b) {\n"
2054" return __bool2mask(__vsetgeu2(__a, __b), 16);\n"
2055"}\n"
2056"__DEVICE__ unsigned int __vsetgeu4(unsigned int __a, unsigned int __b) {\n"
2057" unsigned int r;\n"
2058" asm(\"vset4.u32.u32.ge %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2059" return r;\n"
2060"}\n"
2061"__DEVICE__ unsigned int __vcmpgeu4(unsigned int __a, unsigned int __b) {\n"
2062" return __bool2mask(__vsetgeu4(__a, __b), 8);\n"
2063"}\n"
2064"__DEVICE__ unsigned int __vsetgts2(unsigned int __a, unsigned int __b) {\n"
2065" unsigned int r;\n"
2066" asm(\"vset2.s32.s32.gt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2067" return r;\n"
2068"}\n"
2069"__DEVICE__ unsigned int __vcmpgts2(unsigned int __a, unsigned int __b) {\n"
2070" return __bool2mask(__vsetgts2(__a, __b), 16);\n"
2071"}\n"
2072"__DEVICE__ unsigned int __vsetgts4(unsigned int __a, unsigned int __b) {\n"
2073" unsigned int r;\n"
2074" asm(\"vset4.s32.s32.gt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2075" return r;\n"
2076"}\n"
2077"__DEVICE__ unsigned int __vcmpgts4(unsigned int __a, unsigned int __b) {\n"
2078" return __bool2mask(__vsetgts4(__a, __b), 8);\n"
2079"}\n"
2080"__DEVICE__ unsigned int __vsetgtu2(unsigned int __a, unsigned int __b) {\n"
2081" unsigned int r;\n"
2082" asm(\"vset2.u32.u32.gt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2083" return r;\n"
2084"}\n"
2085"__DEVICE__ unsigned int __vcmpgtu2(unsigned int __a, unsigned int __b) {\n"
2086" return __bool2mask(__vsetgtu2(__a, __b), 16);\n"
2087"}\n"
2088"__DEVICE__ unsigned int __vsetgtu4(unsigned int __a, unsigned int __b) {\n"
2089" unsigned int r;\n"
2090" asm(\"vset4.u32.u32.gt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2091" return r;\n"
2092"}\n"
2093"__DEVICE__ unsigned int __vcmpgtu4(unsigned int __a, unsigned int __b) {\n"
2094" return __bool2mask(__vsetgtu4(__a, __b), 8);\n"
2095"}\n"
2096"__DEVICE__ unsigned int __vsetles2(unsigned int __a, unsigned int __b) {\n"
2097" unsigned int r;\n"
2098" asm(\"vset2.s32.s32.le %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2099" return r;\n"
2100"}\n"
2101"__DEVICE__ unsigned int __vcmples2(unsigned int __a, unsigned int __b) {\n"
2102" return __bool2mask(__vsetles2(__a, __b), 16);\n"
2103"}\n"
2104"__DEVICE__ unsigned int __vsetles4(unsigned int __a, unsigned int __b) {\n"
2105" unsigned int r;\n"
2106" asm(\"vset4.s32.s32.le %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2107" return r;\n"
2108"}\n"
2109"__DEVICE__ unsigned int __vcmples4(unsigned int __a, unsigned int __b) {\n"
2110" return __bool2mask(__vsetles4(__a, __b), 8);\n"
2111"}\n"
2112"__DEVICE__ unsigned int __vsetleu2(unsigned int __a, unsigned int __b) {\n"
2113" unsigned int r;\n"
2114" asm(\"vset2.u32.u32.le %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2115" return r;\n"
2116"}\n"
2117"__DEVICE__ unsigned int __vcmpleu2(unsigned int __a, unsigned int __b) {\n"
2118" return __bool2mask(__vsetleu2(__a, __b), 16);\n"
2119"}\n"
2120"__DEVICE__ unsigned int __vsetleu4(unsigned int __a, unsigned int __b) {\n"
2121" unsigned int r;\n"
2122" asm(\"vset4.u32.u32.le %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2123" return r;\n"
2124"}\n"
2125"__DEVICE__ unsigned int __vcmpleu4(unsigned int __a, unsigned int __b) {\n"
2126" return __bool2mask(__vsetleu4(__a, __b), 8);\n"
2127"}\n"
2128"__DEVICE__ unsigned int __vsetlts2(unsigned int __a, unsigned int __b) {\n"
2129" unsigned int r;\n"
2130" asm(\"vset2.s32.s32.lt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2131" return r;\n"
2132"}\n"
2133"__DEVICE__ unsigned int __vcmplts2(unsigned int __a, unsigned int __b) {\n"
2134" return __bool2mask(__vsetlts2(__a, __b), 16);\n"
2135"}\n"
2136"__DEVICE__ unsigned int __vsetlts4(unsigned int __a, unsigned int __b) {\n"
2137" unsigned int r;\n"
2138" asm(\"vset4.s32.s32.lt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2139" return r;\n"
2140"}\n"
2141"__DEVICE__ unsigned int __vcmplts4(unsigned int __a, unsigned int __b) {\n"
2142" return __bool2mask(__vsetlts4(__a, __b), 8);\n"
2143"}\n"
2144"__DEVICE__ unsigned int __vsetltu2(unsigned int __a, unsigned int __b) {\n"
2145" unsigned int r;\n"
2146" asm(\"vset2.u32.u32.lt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2147" return r;\n"
2148"}\n"
2149"__DEVICE__ unsigned int __vcmpltu2(unsigned int __a, unsigned int __b) {\n"
2150" return __bool2mask(__vsetltu2(__a, __b), 16);\n"
2151"}\n"
2152"__DEVICE__ unsigned int __vsetltu4(unsigned int __a, unsigned int __b) {\n"
2153" unsigned int r;\n"
2154" asm(\"vset4.u32.u32.lt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2155" return r;\n"
2156"}\n"
2157"__DEVICE__ unsigned int __vcmpltu4(unsigned int __a, unsigned int __b) {\n"
2158" return __bool2mask(__vsetltu4(__a, __b), 8);\n"
2159"}\n"
2160"__DEVICE__ unsigned int __vsetne2(unsigned int __a, unsigned int __b) {\n"
2161" unsigned int r;\n"
2162" asm(\"vset2.u32.u32.ne %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2163" return r;\n"
2164"}\n"
2165"__DEVICE__ unsigned int __vcmpne2(unsigned int __a, unsigned int __b) {\n"
2166" return __bool2mask(__vsetne2(__a, __b), 16);\n"
2167"}\n"
2168"__DEVICE__ unsigned int __vsetne4(unsigned int __a, unsigned int __b) {\n"
2169" unsigned int r;\n"
2170" asm(\"vset4.u32.u32.ne %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2171" return r;\n"
2172"}\n"
2173"__DEVICE__ unsigned int __vcmpne4(unsigned int __a, unsigned int __b) {\n"
2174" return __bool2mask(__vsetne4(__a, __b), 8);\n"
2175"}\n"
2176"\n"
2177"// Based on ITEM 23 in AIM-239: http://dspace.mit.edu/handle/1721.1/6086\n"
2178"// (a & b) + (a | b) = a + b = (a ^ b) + 2 * (a & b) =>\n"
2179"// (a + b) / 2 = ((a ^ b) >> 1) + (a & b)\n"
2180"// To operate on multiple sub-elements we need to make sure to mask out bits\n"
2181"// that crossed over into adjacent elements during the shift.\n"
2182"__DEVICE__ unsigned int __vhaddu2(unsigned int __a, unsigned int __b) {\n"
2183" return (((__a ^ __b) >> 1) & ~0x80008000u) + (__a & __b);\n"
2184"}\n"
2185"__DEVICE__ unsigned int __vhaddu4(unsigned int __a, unsigned int __b) {\n"
2186" return (((__a ^ __b) >> 1) & ~0x80808080u) + (__a & __b);\n"
2187"}\n"
2188"\n"
2189"__DEVICE__ unsigned int __vmaxs2(unsigned int __a, unsigned int __b) {\n"
2190" unsigned int r;\n"
2191" if ((__a & 0x8000) && (__b & 0x8000)) {\n"
2192" // Work around a bug in ptxas which produces invalid result if low element\n"
2193" // is negative.\n"
2194" unsigned mask = __vcmpgts2(__a, __b);\n"
2195" r = (__a & mask) | (__b & ~mask);\n"
2196" } else {\n"
2197" asm(\"vmax2.s32.s32.s32 %0,%1,%2,%3;\"\n"
2198" : \"=r\"(r)\n"
2199" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2200" }\n"
2201" return r;\n"
2202"}\n"
2203"__DEVICE__ unsigned int __vmaxs4(unsigned int __a, unsigned int __b) {\n"
2204" unsigned int r;\n"
2205" asm(\"vmax4.s32.s32.s32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2206" return r;\n"
2207"}\n"
2208"__DEVICE__ unsigned int __vmaxu2(unsigned int __a, unsigned int __b) {\n"
2209" unsigned int r;\n"
2210" asm(\"vmax2.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2211" return r;\n"
2212"}\n"
2213"__DEVICE__ unsigned int __vmaxu4(unsigned int __a, unsigned int __b) {\n"
2214" unsigned int r;\n"
2215" asm(\"vmax4.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2216" return r;\n"
2217"}\n"
2218"__DEVICE__ unsigned int __vmins2(unsigned int __a, unsigned int __b) {\n"
2219" unsigned int r;\n"
2220" asm(\"vmin2.s32.s32.s32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2221" return r;\n"
2222"}\n"
2223"__DEVICE__ unsigned int __vmins4(unsigned int __a, unsigned int __b) {\n"
2224" unsigned int r;\n"
2225" asm(\"vmin4.s32.s32.s32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2226" return r;\n"
2227"}\n"
2228"__DEVICE__ unsigned int __vminu2(unsigned int __a, unsigned int __b) {\n"
2229" unsigned int r;\n"
2230" asm(\"vmin2.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2231" return r;\n"
2232"}\n"
2233"__DEVICE__ unsigned int __vminu4(unsigned int __a, unsigned int __b) {\n"
2234" unsigned int r;\n"
2235" asm(\"vmin4.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2236" return r;\n"
2237"}\n"
2238"__DEVICE__ unsigned int __vsads2(unsigned int __a, unsigned int __b) {\n"
2239" unsigned int r;\n"
2240" asm(\"vabsdiff2.s32.s32.s32.add %0,%1,%2,%3;\"\n"
2241" : \"=r\"(r)\n"
2242" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2243" return r;\n"
2244"}\n"
2245"__DEVICE__ unsigned int __vsads4(unsigned int __a, unsigned int __b) {\n"
2246" unsigned int r;\n"
2247" asm(\"vabsdiff4.s32.s32.s32.add %0,%1,%2,%3;\"\n"
2248" : \"=r\"(r)\n"
2249" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2250" return r;\n"
2251"}\n"
2252"__DEVICE__ unsigned int __vsadu2(unsigned int __a, unsigned int __b) {\n"
2253" unsigned int r;\n"
2254" asm(\"vabsdiff2.u32.u32.u32.add %0,%1,%2,%3;\"\n"
2255" : \"=r\"(r)\n"
2256" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2257" return r;\n"
2258"}\n"
2259"__DEVICE__ unsigned int __vsadu4(unsigned int __a, unsigned int __b) {\n"
2260" unsigned int r;\n"
2261" asm(\"vabsdiff4.u32.u32.u32.add %0,%1,%2,%3;\"\n"
2262" : \"=r\"(r)\n"
2263" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2264" return r;\n"
2265"}\n"
2266"\n"
2267"__DEVICE__ unsigned int __vsub2(unsigned int __a, unsigned int __b) {\n"
2268" unsigned int r;\n"
2269" asm(\"vsub2.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2270" return r;\n"
2271"}\n"
2272"__DEVICE__ unsigned int __vneg2(unsigned int __a) { return __vsub2(0, __a); }\n"
2273"\n"
2274"__DEVICE__ unsigned int __vsub4(unsigned int __a, unsigned int __b) {\n"
2275" unsigned int r;\n"
2276" asm(\"vsub4.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2277" return r;\n"
2278"}\n"
2279"__DEVICE__ unsigned int __vneg4(unsigned int __a) { return __vsub4(0, __a); }\n"
2280"__DEVICE__ unsigned int __vsubss2(unsigned int __a, unsigned int __b) {\n"
2281" unsigned int r;\n"
2282" asm(\"vsub2.s32.s32.s32.sat %0,%1,%2,%3;\"\n"
2283" : \"=r\"(r)\n"
2284" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2285" return r;\n"
2286"}\n"
2287"__DEVICE__ unsigned int __vnegss2(unsigned int __a) {\n"
2288" return __vsubss2(0, __a);\n"
2289"}\n"
2290"__DEVICE__ unsigned int __vsubss4(unsigned int __a, unsigned int __b) {\n"
2291" unsigned int r;\n"
2292" asm(\"vsub4.s32.s32.s32.sat %0,%1,%2,%3;\"\n"
2293" : \"=r\"(r)\n"
2294" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2295" return r;\n"
2296"}\n"
2297"__DEVICE__ unsigned int __vnegss4(unsigned int __a) {\n"
2298" return __vsubss4(0, __a);\n"
2299"}\n"
2300"__DEVICE__ unsigned int __vsubus2(unsigned int __a, unsigned int __b) {\n"
2301" unsigned int r;\n"
2302" asm(\"vsub2.u32.u32.u32.sat %0,%1,%2,%3;\"\n"
2303" : \"=r\"(r)\n"
2304" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2305" return r;\n"
2306"}\n"
2307"__DEVICE__ unsigned int __vsubus4(unsigned int __a, unsigned int __b) {\n"
2308" unsigned int r;\n"
2309" asm(\"vsub4.u32.u32.u32.sat %0,%1,%2,%3;\"\n"
2310" : \"=r\"(r)\n"
2311" : \"r\"(__a), \"r\"(__b), \"r\"(0));\n"
2312" return r;\n"
2313"}\n"
2314"#endif // CUDA_VERSION >= 9020\n"
2315"__DEVICE__ int abs(int __a) { return __nv_abs(__a); }\n"
2316"__DEVICE__ double acos(double __a) { return __nv_acos(__a); }\n"
2317"__DEVICE__ float acosf(float __a) { return __nv_acosf(__a); }\n"
2318"__DEVICE__ double acosh(double __a) { return __nv_acosh(__a); }\n"
2319"__DEVICE__ float acoshf(float __a) { return __nv_acoshf(__a); }\n"
2320"__DEVICE__ double asin(double __a) { return __nv_asin(__a); }\n"
2321"__DEVICE__ float asinf(float __a) { return __nv_asinf(__a); }\n"
2322"__DEVICE__ double asinh(double __a) { return __nv_asinh(__a); }\n"
2323"__DEVICE__ float asinhf(float __a) { return __nv_asinhf(__a); }\n"
2324"__DEVICE__ double atan(double __a) { return __nv_atan(__a); }\n"
2325"__DEVICE__ double atan2(double __a, double __b) { return __nv_atan2(__a, __b); }\n"
2326"__DEVICE__ float atan2f(float __a, float __b) { return __nv_atan2f(__a, __b); }\n"
2327"__DEVICE__ float atanf(float __a) { return __nv_atanf(__a); }\n"
2328"__DEVICE__ double atanh(double __a) { return __nv_atanh(__a); }\n"
2329"__DEVICE__ float atanhf(float __a) { return __nv_atanhf(__a); }\n"
2330"__DEVICE__ double cbrt(double __a) { return __nv_cbrt(__a); }\n"
2331"__DEVICE__ float cbrtf(float __a) { return __nv_cbrtf(__a); }\n"
2332"__DEVICE__ double ceil(double __a) { return __nv_ceil(__a); }\n"
2333"__DEVICE__ float ceilf(float __a) { return __nv_ceilf(__a); }\n"
2334"__DEVICE__ int clock() { return __nvvm_read_ptx_sreg_clock(); }\n"
2335"__DEVICE__ long long clock64() { return __nvvm_read_ptx_sreg_clock64(); }\n"
2336"__DEVICE__ double copysign(double __a, double __b) {\n"
2337" return __nv_copysign(__a, __b);\n"
2338"}\n"
2339"__DEVICE__ float copysignf(float __a, float __b) {\n"
2340" return __nv_copysignf(__a, __b);\n"
2341"}\n"
2342"__DEVICE__ double cos(double __a) { return __nv_cos(__a); }\n"
2343"__DEVICE__ float cosf(float __a) {\n"
2344" return __FAST_OR_SLOW(__nv_fast_cosf, __nv_cosf)(__a);\n"
2345"}\n"
2346"__DEVICE__ double cosh(double __a) { return __nv_cosh(__a); }\n"
2347"__DEVICE__ float coshf(float __a) { return __nv_coshf(__a); }\n"
2348"__DEVICE__ double cospi(double __a) { return __nv_cospi(__a); }\n"
2349"__DEVICE__ float cospif(float __a) { return __nv_cospif(__a); }\n"
2350"__DEVICE__ double cyl_bessel_i0(double __a) { return __nv_cyl_bessel_i0(__a); }\n"
2351"__DEVICE__ float cyl_bessel_i0f(float __a) { return __nv_cyl_bessel_i0f(__a); }\n"
2352"__DEVICE__ double cyl_bessel_i1(double __a) { return __nv_cyl_bessel_i1(__a); }\n"
2353"__DEVICE__ float cyl_bessel_i1f(float __a) { return __nv_cyl_bessel_i1f(__a); }\n"
2354"__DEVICE__ double erf(double __a) { return __nv_erf(__a); }\n"
2355"__DEVICE__ double erfc(double __a) { return __nv_erfc(__a); }\n"
2356"__DEVICE__ float erfcf(float __a) { return __nv_erfcf(__a); }\n"
2357"__DEVICE__ double erfcinv(double __a) { return __nv_erfcinv(__a); }\n"
2358"__DEVICE__ float erfcinvf(float __a) { return __nv_erfcinvf(__a); }\n"
2359"__DEVICE__ double erfcx(double __a) { return __nv_erfcx(__a); }\n"
2360"__DEVICE__ float erfcxf(float __a) { return __nv_erfcxf(__a); }\n"
2361"__DEVICE__ float erff(float __a) { return __nv_erff(__a); }\n"
2362"__DEVICE__ double erfinv(double __a) { return __nv_erfinv(__a); }\n"
2363"__DEVICE__ float erfinvf(float __a) { return __nv_erfinvf(__a); }\n"
2364"__DEVICE__ double exp(double __a) { return __nv_exp(__a); }\n"
2365"__DEVICE__ double exp10(double __a) { return __nv_exp10(__a); }\n"
2366"__DEVICE__ float exp10f(float __a) { return __nv_exp10f(__a); }\n"
2367"__DEVICE__ double exp2(double __a) { return __nv_exp2(__a); }\n"
2368"__DEVICE__ float exp2f(float __a) { return __nv_exp2f(__a); }\n"
2369"__DEVICE__ float expf(float __a) { return __nv_expf(__a); }\n"
2370"__DEVICE__ double expm1(double __a) { return __nv_expm1(__a); }\n"
2371"__DEVICE__ float expm1f(float __a) { return __nv_expm1f(__a); }\n"
2372"__DEVICE__ double fabs(double __a) { return __nv_fabs(__a); }\n"
2373"__DEVICE__ float fabsf(float __a) { return __nv_fabsf(__a); }\n"
2374"__DEVICE__ double fdim(double __a, double __b) { return __nv_fdim(__a, __b); }\n"
2375"__DEVICE__ float fdimf(float __a, float __b) { return __nv_fdimf(__a, __b); }\n"
2376"__DEVICE__ double fdivide(double __a, double __b) { return __a / __b; }\n"
2377"__DEVICE__ float fdividef(float __a, float __b) {\n"
2378"#if __FAST_MATH__ && !__CUDA_PREC_DIV\n"
2379" return __nv_fast_fdividef(__a, __b);\n"
2380"#else\n"
2381" return __a / __b;\n"
2382"#endif\n"
2383"}\n"
2384"__DEVICE__ double floor(double __f) { return __nv_floor(__f); }\n"
2385"__DEVICE__ float floorf(float __f) { return __nv_floorf(__f); }\n"
2386"__DEVICE__ double fma(double __a, double __b, double __c) {\n"
2387" return __nv_fma(__a, __b, __c);\n"
2388"}\n"
2389"__DEVICE__ float fmaf(float __a, float __b, float __c) {\n"
2390" return __nv_fmaf(__a, __b, __c);\n"
2391"}\n"
2392"__DEVICE__ double fmax(double __a, double __b) { return __nv_fmax(__a, __b); }\n"
2393"__DEVICE__ float fmaxf(float __a, float __b) { return __nv_fmaxf(__a, __b); }\n"
2394"__DEVICE__ double fmin(double __a, double __b) { return __nv_fmin(__a, __b); }\n"
2395"__DEVICE__ float fminf(float __a, float __b) { return __nv_fminf(__a, __b); }\n"
2396"__DEVICE__ double fmod(double __a, double __b) { return __nv_fmod(__a, __b); }\n"
2397"__DEVICE__ float fmodf(float __a, float __b) { return __nv_fmodf(__a, __b); }\n"
2398"__DEVICE__ double frexp(double __a, int *__b) { return __nv_frexp(__a, __b); }\n"
2399"__DEVICE__ float frexpf(float __a, int *__b) { return __nv_frexpf(__a, __b); }\n"
2400"__DEVICE__ double hypot(double __a, double __b) { return __nv_hypot(__a, __b); }\n"
2401"__DEVICE__ float hypotf(float __a, float __b) { return __nv_hypotf(__a, __b); }\n"
2402"__DEVICE__ int ilogb(double __a) { return __nv_ilogb(__a); }\n"
2403"__DEVICE__ int ilogbf(float __a) { return __nv_ilogbf(__a); }\n"
2404"__DEVICE__ double j0(double __a) { return __nv_j0(__a); }\n"
2405"__DEVICE__ float j0f(float __a) { return __nv_j0f(__a); }\n"
2406"__DEVICE__ double j1(double __a) { return __nv_j1(__a); }\n"
2407"__DEVICE__ float j1f(float __a) { return __nv_j1f(__a); }\n"
2408"__DEVICE__ double jn(int __n, double __a) { return __nv_jn(__n, __a); }\n"
2409"__DEVICE__ float jnf(int __n, float __a) { return __nv_jnf(__n, __a); }\n"
2410"#if defined(__LP64__)\n"
2411"__DEVICE__ long labs(long __a) { return llabs(__a); };\n"
2412"#else\n"
2413"__DEVICE__ long labs(long __a) { return __nv_abs(__a); };\n"
2414"#endif\n"
2415"__DEVICE__ double ldexp(double __a, int __b) { return __nv_ldexp(__a, __b); }\n"
2416"__DEVICE__ float ldexpf(float __a, int __b) { return __nv_ldexpf(__a, __b); }\n"
2417"__DEVICE__ double lgamma(double __a) { return __nv_lgamma(__a); }\n"
2418"__DEVICE__ float lgammaf(float __a) { return __nv_lgammaf(__a); }\n"
2419"__DEVICE__ long long llabs(long long __a) { return __nv_llabs(__a); }\n"
2420"__DEVICE__ long long llmax(long long __a, long long __b) {\n"
2421" return __nv_llmax(__a, __b);\n"
2422"}\n"
2423"__DEVICE__ long long llmin(long long __a, long long __b) {\n"
2424" return __nv_llmin(__a, __b);\n"
2425"}\n"
2426"__DEVICE__ long long llrint(double __a) { return __nv_llrint(__a); }\n"
2427"__DEVICE__ long long llrintf(float __a) { return __nv_llrintf(__a); }\n"
2428"__DEVICE__ long long llround(double __a) { return __nv_llround(__a); }\n"
2429"__DEVICE__ long long llroundf(float __a) { return __nv_llroundf(__a); }\n"
2430"__DEVICE__ double log(double __a) { return __nv_log(__a); }\n"
2431"__DEVICE__ double log10(double __a) { return __nv_log10(__a); }\n"
2432"__DEVICE__ float log10f(float __a) { return __nv_log10f(__a); }\n"
2433"__DEVICE__ double log1p(double __a) { return __nv_log1p(__a); }\n"
2434"__DEVICE__ float log1pf(float __a) { return __nv_log1pf(__a); }\n"
2435"__DEVICE__ double log2(double __a) { return __nv_log2(__a); }\n"
2436"__DEVICE__ float log2f(float __a) {\n"
2437" return __FAST_OR_SLOW(__nv_fast_log2f, __nv_log2f)(__a);\n"
2438"}\n"
2439"__DEVICE__ double logb(double __a) { return __nv_logb(__a); }\n"
2440"__DEVICE__ float logbf(float __a) { return __nv_logbf(__a); }\n"
2441"__DEVICE__ float logf(float __a) {\n"
2442" return __FAST_OR_SLOW(__nv_fast_logf, __nv_logf)(__a);\n"
2443"}\n"
2444"#if defined(__LP64__)\n"
2445"__DEVICE__ long lrint(double __a) { return llrint(__a); }\n"
2446"__DEVICE__ long lrintf(float __a) { return __float2ll_rn(__a); }\n"
2447"__DEVICE__ long lround(double __a) { return llround(__a); }\n"
2448"__DEVICE__ long lroundf(float __a) { return llroundf(__a); }\n"
2449"#else\n"
2450"__DEVICE__ long lrint(double __a) { return (long)rint(__a); }\n"
2451"__DEVICE__ long lrintf(float __a) { return __float2int_rn(__a); }\n"
2452"__DEVICE__ long lround(double __a) { return round(__a); }\n"
2453"__DEVICE__ long lroundf(float __a) { return roundf(__a); }\n"
2454"#endif\n"
2455"__DEVICE__ int max(int __a, int __b) { return __nv_max(__a, __b); }\n"
2456"__DEVICE__ void *memcpy(void *__a, const void *__b, size_t __c) {\n"
2457" return __builtin_memcpy(__a, __b, __c);\n"
2458"}\n"
2459"__DEVICE__ void *memset(void *__a, int __b, size_t __c) {\n"
2460" return __builtin_memset(__a, __b, __c);\n"
2461"}\n"
2462"__DEVICE__ int min(int __a, int __b) { return __nv_min(__a, __b); }\n"
2463"__DEVICE__ double modf(double __a, double *__b) { return __nv_modf(__a, __b); }\n"
2464"__DEVICE__ float modff(float __a, float *__b) { return __nv_modff(__a, __b); }\n"
2465"__DEVICE__ double nearbyint(double __a) { return __nv_nearbyint(__a); }\n"
2466"__DEVICE__ float nearbyintf(float __a) { return __nv_nearbyintf(__a); }\n"
2467"__DEVICE__ double nextafter(double __a, double __b) {\n"
2468" return __nv_nextafter(__a, __b);\n"
2469"}\n"
2470"__DEVICE__ float nextafterf(float __a, float __b) {\n"
2471" return __nv_nextafterf(__a, __b);\n"
2472"}\n"
2473"__DEVICE__ double norm(int __dim, const double *__t) {\n"
2474" return __nv_norm(__dim, __t);\n"
2475"}\n"
2476"__DEVICE__ double norm3d(double __a, double __b, double __c) {\n"
2477" return __nv_norm3d(__a, __b, __c);\n"
2478"}\n"
2479"__DEVICE__ float norm3df(float __a, float __b, float __c) {\n"
2480" return __nv_norm3df(__a, __b, __c);\n"
2481"}\n"
2482"__DEVICE__ double norm4d(double __a, double __b, double __c, double __d) {\n"
2483" return __nv_norm4d(__a, __b, __c, __d);\n"
2484"}\n"
2485"__DEVICE__ float norm4df(float __a, float __b, float __c, float __d) {\n"
2486" return __nv_norm4df(__a, __b, __c, __d);\n"
2487"}\n"
2488"__DEVICE__ double normcdf(double __a) { return __nv_normcdf(__a); }\n"
2489"__DEVICE__ float normcdff(float __a) { return __nv_normcdff(__a); }\n"
2490"__DEVICE__ double normcdfinv(double __a) { return __nv_normcdfinv(__a); }\n"
2491"__DEVICE__ float normcdfinvf(float __a) { return __nv_normcdfinvf(__a); }\n"
2492"__DEVICE__ float normf(int __dim, const float *__t) {\n"
2493" return __nv_normf(__dim, __t);\n"
2494"}\n"
2495"__DEVICE__ double pow(double __a, double __b) { return __nv_pow(__a, __b); }\n"
2496"__DEVICE__ float powf(float __a, float __b) { return __nv_powf(__a, __b); }\n"
2497"__DEVICE__ double powi(double __a, int __b) { return __nv_powi(__a, __b); }\n"
2498"__DEVICE__ float powif(float __a, int __b) { return __nv_powif(__a, __b); }\n"
2499"__DEVICE__ double rcbrt(double __a) { return __nv_rcbrt(__a); }\n"
2500"__DEVICE__ float rcbrtf(float __a) { return __nv_rcbrtf(__a); }\n"
2501"__DEVICE__ double remainder(double __a, double __b) {\n"
2502" return __nv_remainder(__a, __b);\n"
2503"}\n"
2504"__DEVICE__ float remainderf(float __a, float __b) {\n"
2505" return __nv_remainderf(__a, __b);\n"
2506"}\n"
2507"__DEVICE__ double remquo(double __a, double __b, int *__c) {\n"
2508" return __nv_remquo(__a, __b, __c);\n"
2509"}\n"
2510"__DEVICE__ float remquof(float __a, float __b, int *__c) {\n"
2511" return __nv_remquof(__a, __b, __c);\n"
2512"}\n"
2513"__DEVICE__ double rhypot(double __a, double __b) {\n"
2514" return __nv_rhypot(__a, __b);\n"
2515"}\n"
2516"__DEVICE__ float rhypotf(float __a, float __b) {\n"
2517" return __nv_rhypotf(__a, __b);\n"
2518"}\n"
2519"__DEVICE__ double rint(double __a) { return __nv_rint(__a); }\n"
2520"__DEVICE__ float rintf(float __a) { return __nv_rintf(__a); }\n"
2521"__DEVICE__ double rnorm(int __a, const double *__b) {\n"
2522" return __nv_rnorm(__a, __b);\n"
2523"}\n"
2524"__DEVICE__ double rnorm3d(double __a, double __b, double __c) {\n"
2525" return __nv_rnorm3d(__a, __b, __c);\n"
2526"}\n"
2527"__DEVICE__ float rnorm3df(float __a, float __b, float __c) {\n"
2528" return __nv_rnorm3df(__a, __b, __c);\n"
2529"}\n"
2530"__DEVICE__ double rnorm4d(double __a, double __b, double __c, double __d) {\n"
2531" return __nv_rnorm4d(__a, __b, __c, __d);\n"
2532"}\n"
2533"__DEVICE__ float rnorm4df(float __a, float __b, float __c, float __d) {\n"
2534" return __nv_rnorm4df(__a, __b, __c, __d);\n"
2535"}\n"
2536"__DEVICE__ float rnormf(int __dim, const float *__t) {\n"
2537" return __nv_rnormf(__dim, __t);\n"
2538"}\n"
2539"__DEVICE__ double round(double __a) { return __nv_round(__a); }\n"
2540"__DEVICE__ float roundf(float __a) { return __nv_roundf(__a); }\n"
2541"__DEVICE__ double rsqrt(double __a) { return __nv_rsqrt(__a); }\n"
2542"__DEVICE__ float rsqrtf(float __a) { return __nv_rsqrtf(__a); }\n"
2543"__DEVICE__ double scalbn(double __a, int __b) { return __nv_scalbn(__a, __b); }\n"
2544"__DEVICE__ float scalbnf(float __a, int __b) { return __nv_scalbnf(__a, __b); }\n"
2545"__DEVICE__ double scalbln(double __a, long __b) {\n"
2546" if (__b > INT_MAX)\n"
2547" return __a > 0 ? HUGE_VAL : -HUGE_VAL;\n"
2548" if (__b < INT_MIN)\n"
2549" return __a > 0 ? 0.0 : -0.0;\n"
2550" return scalbn(__a, (int)__b);\n"
2551"}\n"
2552"__DEVICE__ float scalblnf(float __a, long __b) {\n"
2553" if (__b > INT_MAX)\n"
2554" return __a > 0 ? HUGE_VALF : -HUGE_VALF;\n"
2555" if (__b < INT_MIN)\n"
2556" return __a > 0 ? 0.f : -0.f;\n"
2557" return scalbnf(__a, (int)__b);\n"
2558"}\n"
2559"__DEVICE__ double sin(double __a) { return __nv_sin(__a); }\n"
2560"__DEVICE__ void sincos(double __a, double *__sptr, double *__cptr) {\n"
2561" return __nv_sincos(__a, __sptr, __cptr);\n"
2562"}\n"
2563"__DEVICE__ void sincosf(float __a, float *__sptr, float *__cptr) {\n"
2564" return __FAST_OR_SLOW(__nv_fast_sincosf, __nv_sincosf)(__a, __sptr, __cptr);\n"
2565"}\n"
2566"__DEVICE__ void sincospi(double __a, double *__sptr, double *__cptr) {\n"
2567" return __nv_sincospi(__a, __sptr, __cptr);\n"
2568"}\n"
2569"__DEVICE__ void sincospif(float __a, float *__sptr, float *__cptr) {\n"
2570" return __nv_sincospif(__a, __sptr, __cptr);\n"
2571"}\n"
2572"__DEVICE__ float sinf(float __a) {\n"
2573" return __FAST_OR_SLOW(__nv_fast_sinf, __nv_sinf)(__a);\n"
2574"}\n"
2575"__DEVICE__ double sinh(double __a) { return __nv_sinh(__a); }\n"
2576"__DEVICE__ float sinhf(float __a) { return __nv_sinhf(__a); }\n"
2577"__DEVICE__ double sinpi(double __a) { return __nv_sinpi(__a); }\n"
2578"__DEVICE__ float sinpif(float __a) { return __nv_sinpif(__a); }\n"
2579"__DEVICE__ double sqrt(double __a) { return __nv_sqrt(__a); }\n"
2580"__DEVICE__ float sqrtf(float __a) { return __nv_sqrtf(__a); }\n"
2581"__DEVICE__ double tan(double __a) { return __nv_tan(__a); }\n"
2582"__DEVICE__ float tanf(float __a) { return __nv_tanf(__a); }\n"
2583"__DEVICE__ double tanh(double __a) { return __nv_tanh(__a); }\n"
2584"__DEVICE__ float tanhf(float __a) { return __nv_tanhf(__a); }\n"
2585"__DEVICE__ double tgamma(double __a) { return __nv_tgamma(__a); }\n"
2586"__DEVICE__ float tgammaf(float __a) { return __nv_tgammaf(__a); }\n"
2587"__DEVICE__ double trunc(double __a) { return __nv_trunc(__a); }\n"
2588"__DEVICE__ float truncf(float __a) { return __nv_truncf(__a); }\n"
2589"__DEVICE__ unsigned long long ullmax(unsigned long long __a,\n"
2590" unsigned long long __b) {\n"
2591" return __nv_ullmax(__a, __b);\n"
2592"}\n"
2593"__DEVICE__ unsigned long long ullmin(unsigned long long __a,\n"
2594" unsigned long long __b) {\n"
2595" return __nv_ullmin(__a, __b);\n"
2596"}\n"
2597"__DEVICE__ unsigned int umax(unsigned int __a, unsigned int __b) {\n"
2598" return __nv_umax(__a, __b);\n"
2599"}\n"
2600"__DEVICE__ unsigned int umin(unsigned int __a, unsigned int __b) {\n"
2601" return __nv_umin(__a, __b);\n"
2602"}\n"
2603"__DEVICE__ double y0(double __a) { return __nv_y0(__a); }\n"
2604"__DEVICE__ float y0f(float __a) { return __nv_y0f(__a); }\n"
2605"__DEVICE__ double y1(double __a) { return __nv_y1(__a); }\n"
2606"__DEVICE__ float y1f(float __a) { return __nv_y1f(__a); }\n"
2607"__DEVICE__ double yn(int __a, double __b) { return __nv_yn(__a, __b); }\n"
2608"__DEVICE__ float ynf(int __a, float __b) { return __nv_ynf(__a, __b); }\n"
2609"\n"
2610"#pragma pop_macro(\"__DEVICE__\")\n"
2611"#pragma pop_macro(\"__FAST_OR_SLOW\")\n"
2612"#endif // __CLANG_CUDA_DEVICE_FUNCTIONS_H__\n"
2613"" } ,
2614 { "/builtins/__clang_cuda_intrinsics.h" , "/*===--- __clang_cuda_intrinsics.h - Device-side CUDA intrinsic wrappers ---===\n"
2615" *\n"
2616" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
2617" * of this software and associated documentation files (the \"Software\"), to deal\n"
2618" * in the Software without restriction, including without limitation the rights\n"
2619" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
2620" * copies of the Software, and to permit persons to whom the Software is\n"
2621" * furnished to do so, subject to the following conditions:\n"
2622" *\n"
2623" * The above copyright notice and this permission notice shall be included in\n"
2624" * all copies or substantial portions of the Software.\n"
2625" *\n"
2626" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
2627" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
2628" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
2629" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
2630" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
2631" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
2632" * THE SOFTWARE.\n"
2633" *\n"
2634" *===-----------------------------------------------------------------------===\n"
2635" */\n"
2636"#ifndef __CLANG_CUDA_INTRINSICS_H__\n"
2637"#define __CLANG_CUDA_INTRINSICS_H__\n"
2638"#ifndef __CUDA__\n"
2639"#error \"This file is for CUDA compilation only.\"\n"
2640"#endif\n"
2641"\n"
2642"// sm_30 intrinsics: __shfl_{up,down,xor}.\n"
2643"\n"
2644"#define __SM_30_INTRINSICS_H__\n"
2645"#define __SM_30_INTRINSICS_HPP__\n"
2646"\n"
2647"#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300\n"
2648"\n"
2649"#pragma push_macro(\"__MAKE_SHUFFLES\")\n"
2650"#define __MAKE_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic, __Mask, \\\n"
2651" __Type) \\\n"
2652" inline __device__ int __FnName(int __val, __Type __offset, \\\n"
2653" int __width = warpSize) { \\\n"
2654" return __IntIntrinsic(__val, __offset, \\\n"
2655" ((warpSize - __width) << 8) | (__Mask)); \\\n"
2656" } \\\n"
2657" inline __device__ float __FnName(float __val, __Type __offset, \\\n"
2658" int __width = warpSize) { \\\n"
2659" return __FloatIntrinsic(__val, __offset, \\\n"
2660" ((warpSize - __width) << 8) | (__Mask)); \\\n"
2661" } \\\n"
2662" inline __device__ unsigned int __FnName(unsigned int __val, __Type __offset, \\\n"
2663" int __width = warpSize) { \\\n"
2664" return static_cast<unsigned int>( \\\n"
2665" ::__FnName(static_cast<int>(__val), __offset, __width)); \\\n"
2666" } \\\n"
2667" inline __device__ long long __FnName(long long __val, __Type __offset, \\\n"
2668" int __width = warpSize) { \\\n"
2669" struct __Bits { \\\n"
2670" int __a, __b; \\\n"
2671" }; \\\n"
2672" _Static_assert(sizeof(__val) == sizeof(__Bits)); \\\n"
2673" _Static_assert(sizeof(__Bits) == 2 * sizeof(int)); \\\n"
2674" __Bits __tmp; \\\n"
2675" memcpy(&__val, &__tmp, sizeof(__val)); \\\n"
2676" __tmp.__a = ::__FnName(__tmp.__a, __offset, __width); \\\n"
2677" __tmp.__b = ::__FnName(__tmp.__b, __offset, __width); \\\n"
2678" long long __ret; \\\n"
2679" memcpy(&__ret, &__tmp, sizeof(__tmp)); \\\n"
2680" return __ret; \\\n"
2681" } \\\n"
2682" inline __device__ long __FnName(long __val, __Type __offset, \\\n"
2683" int __width = warpSize) { \\\n"
2684" _Static_assert(sizeof(long) == sizeof(long long) || \\\n"
2685" sizeof(long) == sizeof(int)); \\\n"
2686" if (sizeof(long) == sizeof(long long)) { \\\n"
2687" return static_cast<long>( \\\n"
2688" ::__FnName(static_cast<long long>(__val), __offset, __width)); \\\n"
2689" } else if (sizeof(long) == sizeof(int)) { \\\n"
2690" return static_cast<long>( \\\n"
2691" ::__FnName(static_cast<int>(__val), __offset, __width)); \\\n"
2692" } \\\n"
2693" } \\\n"
2694" inline __device__ unsigned long __FnName( \\\n"
2695" unsigned long __val, __Type __offset, int __width = warpSize) { \\\n"
2696" return static_cast<unsigned long>( \\\n"
2697" ::__FnName(static_cast<long>(__val), __offset, __width)); \\\n"
2698" } \\\n"
2699" inline __device__ unsigned long long __FnName( \\\n"
2700" unsigned long long __val, __Type __offset, int __width = warpSize) { \\\n"
2701" return static_cast<unsigned long long>(::__FnName( \\\n"
2702" static_cast<unsigned long long>(__val), __offset, __width)); \\\n"
2703" } \\\n"
2704" inline __device__ double __FnName(double __val, __Type __offset, \\\n"
2705" int __width = warpSize) { \\\n"
2706" long long __tmp; \\\n"
2707" _Static_assert(sizeof(__tmp) == sizeof(__val)); \\\n"
2708" memcpy(&__tmp, &__val, sizeof(__val)); \\\n"
2709" __tmp = ::__FnName(__tmp, __offset, __width); \\\n"
2710" double __ret; \\\n"
2711" memcpy(&__ret, &__tmp, sizeof(__ret)); \\\n"
2712" return __ret; \\\n"
2713" }\n"
2714"\n"
2715"__MAKE_SHUFFLES(__shfl, __nvvm_shfl_idx_i32, __nvvm_shfl_idx_f32, 0x1f, int);\n"
2716"// We use 0 rather than 31 as our mask, because shfl.up applies to lanes >=\n"
2717"// maxLane.\n"
2718"__MAKE_SHUFFLES(__shfl_up, __nvvm_shfl_up_i32, __nvvm_shfl_up_f32, 0,\n"
2719" unsigned int);\n"
2720"__MAKE_SHUFFLES(__shfl_down, __nvvm_shfl_down_i32, __nvvm_shfl_down_f32, 0x1f,\n"
2721" unsigned int);\n"
2722"__MAKE_SHUFFLES(__shfl_xor, __nvvm_shfl_bfly_i32, __nvvm_shfl_bfly_f32, 0x1f,\n"
2723" int);\n"
2724"#pragma pop_macro(\"__MAKE_SHUFFLES\")\n"
2725"\n"
2726"#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300\n"
2727"\n"
2728"#if CUDA_VERSION >= 9000\n"
2729"#if (!defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300)\n"
2730"// __shfl_sync_* variants available in CUDA-9\n"
2731"#pragma push_macro(\"__MAKE_SYNC_SHUFFLES\")\n"
2732"#define __MAKE_SYNC_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic, \\\n"
2733" __Mask, __Type) \\\n"
2734" inline __device__ int __FnName(unsigned int __mask, int __val, \\\n"
2735" __Type __offset, int __width = warpSize) { \\\n"
2736" return __IntIntrinsic(__mask, __val, __offset, \\\n"
2737" ((warpSize - __width) << 8) | (__Mask)); \\\n"
2738" } \\\n"
2739" inline __device__ float __FnName(unsigned int __mask, float __val, \\\n"
2740" __Type __offset, int __width = warpSize) { \\\n"
2741" return __FloatIntrinsic(__mask, __val, __offset, \\\n"
2742" ((warpSize - __width) << 8) | (__Mask)); \\\n"
2743" } \\\n"
2744" inline __device__ unsigned int __FnName(unsigned int __mask, \\\n"
2745" unsigned int __val, __Type __offset, \\\n"
2746" int __width = warpSize) { \\\n"
2747" return static_cast<unsigned int>( \\\n"
2748" ::__FnName(__mask, static_cast<int>(__val), __offset, __width)); \\\n"
2749" } \\\n"
2750" inline __device__ long long __FnName(unsigned int __mask, long long __val, \\\n"
2751" __Type __offset, \\\n"
2752" int __width = warpSize) { \\\n"
2753" struct __Bits { \\\n"
2754" int __a, __b; \\\n"
2755" }; \\\n"
2756" _Static_assert(sizeof(__val) == sizeof(__Bits)); \\\n"
2757" _Static_assert(sizeof(__Bits) == 2 * sizeof(int)); \\\n"
2758" __Bits __tmp; \\\n"
2759" memcpy(&__val, &__tmp, sizeof(__val)); \\\n"
2760" __tmp.__a = ::__FnName(__mask, __tmp.__a, __offset, __width); \\\n"
2761" __tmp.__b = ::__FnName(__mask, __tmp.__b, __offset, __width); \\\n"
2762" long long __ret; \\\n"
2763" memcpy(&__ret, &__tmp, sizeof(__tmp)); \\\n"
2764" return __ret; \\\n"
2765" } \\\n"
2766" inline __device__ unsigned long long __FnName( \\\n"
2767" unsigned int __mask, unsigned long long __val, __Type __offset, \\\n"
2768" int __width = warpSize) { \\\n"
2769" return static_cast<unsigned long long>(::__FnName( \\\n"
2770" __mask, static_cast<unsigned long long>(__val), __offset, __width)); \\\n"
2771" } \\\n"
2772" inline __device__ long __FnName(unsigned int __mask, long __val, \\\n"
2773" __Type __offset, int __width = warpSize) { \\\n"
2774" _Static_assert(sizeof(long) == sizeof(long long) || \\\n"
2775" sizeof(long) == sizeof(int)); \\\n"
2776" if (sizeof(long) == sizeof(long long)) { \\\n"
2777" return static_cast<long>(::__FnName( \\\n"
2778" __mask, static_cast<long long>(__val), __offset, __width)); \\\n"
2779" } else if (sizeof(long) == sizeof(int)) { \\\n"
2780" return static_cast<long>( \\\n"
2781" ::__FnName(__mask, static_cast<int>(__val), __offset, __width)); \\\n"
2782" } \\\n"
2783" } \\\n"
2784" inline __device__ unsigned long __FnName( \\\n"
2785" unsigned int __mask, unsigned long __val, __Type __offset, \\\n"
2786" int __width = warpSize) { \\\n"
2787" return static_cast<unsigned long>( \\\n"
2788" ::__FnName(__mask, static_cast<long>(__val), __offset, __width)); \\\n"
2789" } \\\n"
2790" inline __device__ double __FnName(unsigned int __mask, double __val, \\\n"
2791" __Type __offset, int __width = warpSize) { \\\n"
2792" long long __tmp; \\\n"
2793" _Static_assert(sizeof(__tmp) == sizeof(__val)); \\\n"
2794" memcpy(&__tmp, &__val, sizeof(__val)); \\\n"
2795" __tmp = ::__FnName(__mask, __tmp, __offset, __width); \\\n"
2796" double __ret; \\\n"
2797" memcpy(&__ret, &__tmp, sizeof(__ret)); \\\n"
2798" return __ret; \\\n"
2799" }\n"
2800"__MAKE_SYNC_SHUFFLES(__shfl_sync, __nvvm_shfl_sync_idx_i32,\n"
2801" __nvvm_shfl_sync_idx_f32, 0x1f, int);\n"
2802"// We use 0 rather than 31 as our mask, because shfl.up applies to lanes >=\n"
2803"// maxLane.\n"
2804"__MAKE_SYNC_SHUFFLES(__shfl_up_sync, __nvvm_shfl_sync_up_i32,\n"
2805" __nvvm_shfl_sync_up_f32, 0, unsigned int);\n"
2806"__MAKE_SYNC_SHUFFLES(__shfl_down_sync, __nvvm_shfl_sync_down_i32,\n"
2807" __nvvm_shfl_sync_down_f32, 0x1f, unsigned int);\n"
2808"__MAKE_SYNC_SHUFFLES(__shfl_xor_sync, __nvvm_shfl_sync_bfly_i32,\n"
2809" __nvvm_shfl_sync_bfly_f32, 0x1f, int);\n"
2810"#pragma pop_macro(\"__MAKE_SYNC_SHUFFLES\")\n"
2811"\n"
2812"inline __device__ void __syncwarp(unsigned int mask = 0xffffffff) {\n"
2813" return __nvvm_bar_warp_sync(mask);\n"
2814"}\n"
2815"\n"
2816"inline __device__ void __barrier_sync(unsigned int id) {\n"
2817" __nvvm_barrier_sync(id);\n"
2818"}\n"
2819"\n"
2820"inline __device__ void __barrier_sync_count(unsigned int id,\n"
2821" unsigned int count) {\n"
2822" __nvvm_barrier_sync_cnt(id, count);\n"
2823"}\n"
2824"\n"
2825"inline __device__ int __all_sync(unsigned int mask, int pred) {\n"
2826" return __nvvm_vote_all_sync(mask, pred);\n"
2827"}\n"
2828"\n"
2829"inline __device__ int __any_sync(unsigned int mask, int pred) {\n"
2830" return __nvvm_vote_any_sync(mask, pred);\n"
2831"}\n"
2832"\n"
2833"inline __device__ int __uni_sync(unsigned int mask, int pred) {\n"
2834" return __nvvm_vote_uni_sync(mask, pred);\n"
2835"}\n"
2836"\n"
2837"inline __device__ unsigned int __ballot_sync(unsigned int mask, int pred) {\n"
2838" return __nvvm_vote_ballot_sync(mask, pred);\n"
2839"}\n"
2840"\n"
2841"inline __device__ unsigned int __activemask() { return __nvvm_vote_ballot(1); }\n"
2842"\n"
2843"inline __device__ unsigned int __fns(unsigned mask, unsigned base, int offset) {\n"
2844" return __nvvm_fns(mask, base, offset);\n"
2845"}\n"
2846"\n"
2847"#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300\n"
2848"\n"
2849"// Define __match* builtins CUDA-9 headers expect to see.\n"
2850"#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700\n"
2851"inline __device__ unsigned int __match32_any_sync(unsigned int mask,\n"
2852" unsigned int value) {\n"
2853" return __nvvm_match_any_sync_i32(mask, value);\n"
2854"}\n"
2855"\n"
2856"inline __device__ unsigned long long\n"
2857"__match64_any_sync(unsigned int mask, unsigned long long value) {\n"
2858" return __nvvm_match_any_sync_i64(mask, value);\n"
2859"}\n"
2860"\n"
2861"inline __device__ unsigned int\n"
2862"__match32_all_sync(unsigned int mask, unsigned int value, int *pred) {\n"
2863" return __nvvm_match_all_sync_i32p(mask, value, pred);\n"
2864"}\n"
2865"\n"
2866"inline __device__ unsigned long long\n"
2867"__match64_all_sync(unsigned int mask, unsigned long long value, int *pred) {\n"
2868" return __nvvm_match_all_sync_i64p(mask, value, pred);\n"
2869"}\n"
2870"#include \"crt/sm_70_rt.hpp\"\n"
2871"\n"
2872"#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700\n"
2873"#endif // __CUDA_VERSION >= 9000\n"
2874"\n"
2875"// sm_32 intrinsics: __ldg and __funnelshift_{l,lc,r,rc}.\n"
2876"\n"
2877"// Prevent the vanilla sm_32 intrinsics header from being included.\n"
2878"#define __SM_32_INTRINSICS_H__\n"
2879"#define __SM_32_INTRINSICS_HPP__\n"
2880"\n"
2881"#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 320\n"
2882"\n"
2883"inline __device__ char __ldg(const char *ptr) { return __nvvm_ldg_c(ptr); }\n"
2884"inline __device__ short __ldg(const short *ptr) { return __nvvm_ldg_s(ptr); }\n"
2885"inline __device__ int __ldg(const int *ptr) { return __nvvm_ldg_i(ptr); }\n"
2886"inline __device__ long __ldg(const long *ptr) { return __nvvm_ldg_l(ptr); }\n"
2887"inline __device__ long long __ldg(const long long *ptr) {\n"
2888" return __nvvm_ldg_ll(ptr);\n"
2889"}\n"
2890"inline __device__ unsigned char __ldg(const unsigned char *ptr) {\n"
2891" return __nvvm_ldg_uc(ptr);\n"
2892"}\n"
2893"inline __device__ signed char __ldg(const signed char *ptr) {\n"
2894" return __nvvm_ldg_uc((const unsigned char *)ptr);\n"
2895"}\n"
2896"inline __device__ unsigned short __ldg(const unsigned short *ptr) {\n"
2897" return __nvvm_ldg_us(ptr);\n"
2898"}\n"
2899"inline __device__ unsigned int __ldg(const unsigned int *ptr) {\n"
2900" return __nvvm_ldg_ui(ptr);\n"
2901"}\n"
2902"inline __device__ unsigned long __ldg(const unsigned long *ptr) {\n"
2903" return __nvvm_ldg_ul(ptr);\n"
2904"}\n"
2905"inline __device__ unsigned long long __ldg(const unsigned long long *ptr) {\n"
2906" return __nvvm_ldg_ull(ptr);\n"
2907"}\n"
2908"inline __device__ float __ldg(const float *ptr) { return __nvvm_ldg_f(ptr); }\n"
2909"inline __device__ double __ldg(const double *ptr) { return __nvvm_ldg_d(ptr); }\n"
2910"\n"
2911"inline __device__ char2 __ldg(const char2 *ptr) {\n"
2912" typedef char c2 __attribute__((ext_vector_type(2)));\n"
2913" // We can assume that ptr is aligned at least to char2's alignment, but the\n"
2914" // load will assume that ptr is aligned to char2's alignment. This is only\n"
2915" // safe if alignof(c2) <= alignof(char2).\n"
2916" c2 rv = __nvvm_ldg_c2(reinterpret_cast<const c2 *>(ptr));\n"
2917" char2 ret;\n"
2918" ret.x = rv[0];\n"
2919" ret.y = rv[1];\n"
2920" return ret;\n"
2921"}\n"
2922"inline __device__ char4 __ldg(const char4 *ptr) {\n"
2923" typedef char c4 __attribute__((ext_vector_type(4)));\n"
2924" c4 rv = __nvvm_ldg_c4(reinterpret_cast<const c4 *>(ptr));\n"
2925" char4 ret;\n"
2926" ret.x = rv[0];\n"
2927" ret.y = rv[1];\n"
2928" ret.z = rv[2];\n"
2929" ret.w = rv[3];\n"
2930" return ret;\n"
2931"}\n"
2932"inline __device__ short2 __ldg(const short2 *ptr) {\n"
2933" typedef short s2 __attribute__((ext_vector_type(2)));\n"
2934" s2 rv = __nvvm_ldg_s2(reinterpret_cast<const s2 *>(ptr));\n"
2935" short2 ret;\n"
2936" ret.x = rv[0];\n"
2937" ret.y = rv[1];\n"
2938" return ret;\n"
2939"}\n"
2940"inline __device__ short4 __ldg(const short4 *ptr) {\n"
2941" typedef short s4 __attribute__((ext_vector_type(4)));\n"
2942" s4 rv = __nvvm_ldg_s4(reinterpret_cast<const s4 *>(ptr));\n"
2943" short4 ret;\n"
2944" ret.x = rv[0];\n"
2945" ret.y = rv[1];\n"
2946" ret.z = rv[2];\n"
2947" ret.w = rv[3];\n"
2948" return ret;\n"
2949"}\n"
2950"inline __device__ int2 __ldg(const int2 *ptr) {\n"
2951" typedef int i2 __attribute__((ext_vector_type(2)));\n"
2952" i2 rv = __nvvm_ldg_i2(reinterpret_cast<const i2 *>(ptr));\n"
2953" int2 ret;\n"
2954" ret.x = rv[0];\n"
2955" ret.y = rv[1];\n"
2956" return ret;\n"
2957"}\n"
2958"inline __device__ int4 __ldg(const int4 *ptr) {\n"
2959" typedef int i4 __attribute__((ext_vector_type(4)));\n"
2960" i4 rv = __nvvm_ldg_i4(reinterpret_cast<const i4 *>(ptr));\n"
2961" int4 ret;\n"
2962" ret.x = rv[0];\n"
2963" ret.y = rv[1];\n"
2964" ret.z = rv[2];\n"
2965" ret.w = rv[3];\n"
2966" return ret;\n"
2967"}\n"
2968"inline __device__ longlong2 __ldg(const longlong2 *ptr) {\n"
2969" typedef long long ll2 __attribute__((ext_vector_type(2)));\n"
2970" ll2 rv = __nvvm_ldg_ll2(reinterpret_cast<const ll2 *>(ptr));\n"
2971" longlong2 ret;\n"
2972" ret.x = rv[0];\n"
2973" ret.y = rv[1];\n"
2974" return ret;\n"
2975"}\n"
2976"\n"
2977"inline __device__ uchar2 __ldg(const uchar2 *ptr) {\n"
2978" typedef unsigned char uc2 __attribute__((ext_vector_type(2)));\n"
2979" uc2 rv = __nvvm_ldg_uc2(reinterpret_cast<const uc2 *>(ptr));\n"
2980" uchar2 ret;\n"
2981" ret.x = rv[0];\n"
2982" ret.y = rv[1];\n"
2983" return ret;\n"
2984"}\n"
2985"inline __device__ uchar4 __ldg(const uchar4 *ptr) {\n"
2986" typedef unsigned char uc4 __attribute__((ext_vector_type(4)));\n"
2987" uc4 rv = __nvvm_ldg_uc4(reinterpret_cast<const uc4 *>(ptr));\n"
2988" uchar4 ret;\n"
2989" ret.x = rv[0];\n"
2990" ret.y = rv[1];\n"
2991" ret.z = rv[2];\n"
2992" ret.w = rv[3];\n"
2993" return ret;\n"
2994"}\n"
2995"inline __device__ ushort2 __ldg(const ushort2 *ptr) {\n"
2996" typedef unsigned short us2 __attribute__((ext_vector_type(2)));\n"
2997" us2 rv = __nvvm_ldg_us2(reinterpret_cast<const us2 *>(ptr));\n"
2998" ushort2 ret;\n"
2999" ret.x = rv[0];\n"
3000" ret.y = rv[1];\n"
3001" return ret;\n"
3002"}\n"
3003"inline __device__ ushort4 __ldg(const ushort4 *ptr) {\n"
3004" typedef unsigned short us4 __attribute__((ext_vector_type(4)));\n"
3005" us4 rv = __nvvm_ldg_us4(reinterpret_cast<const us4 *>(ptr));\n"
3006" ushort4 ret;\n"
3007" ret.x = rv[0];\n"
3008" ret.y = rv[1];\n"
3009" ret.z = rv[2];\n"
3010" ret.w = rv[3];\n"
3011" return ret;\n"
3012"}\n"
3013"inline __device__ uint2 __ldg(const uint2 *ptr) {\n"
3014" typedef unsigned int ui2 __attribute__((ext_vector_type(2)));\n"
3015" ui2 rv = __nvvm_ldg_ui2(reinterpret_cast<const ui2 *>(ptr));\n"
3016" uint2 ret;\n"
3017" ret.x = rv[0];\n"
3018" ret.y = rv[1];\n"
3019" return ret;\n"
3020"}\n"
3021"inline __device__ uint4 __ldg(const uint4 *ptr) {\n"
3022" typedef unsigned int ui4 __attribute__((ext_vector_type(4)));\n"
3023" ui4 rv = __nvvm_ldg_ui4(reinterpret_cast<const ui4 *>(ptr));\n"
3024" uint4 ret;\n"
3025" ret.x = rv[0];\n"
3026" ret.y = rv[1];\n"
3027" ret.z = rv[2];\n"
3028" ret.w = rv[3];\n"
3029" return ret;\n"
3030"}\n"
3031"inline __device__ ulonglong2 __ldg(const ulonglong2 *ptr) {\n"
3032" typedef unsigned long long ull2 __attribute__((ext_vector_type(2)));\n"
3033" ull2 rv = __nvvm_ldg_ull2(reinterpret_cast<const ull2 *>(ptr));\n"
3034" ulonglong2 ret;\n"
3035" ret.x = rv[0];\n"
3036" ret.y = rv[1];\n"
3037" return ret;\n"
3038"}\n"
3039"\n"
3040"inline __device__ float2 __ldg(const float2 *ptr) {\n"
3041" typedef float f2 __attribute__((ext_vector_type(2)));\n"
3042" f2 rv = __nvvm_ldg_f2(reinterpret_cast<const f2 *>(ptr));\n"
3043" float2 ret;\n"
3044" ret.x = rv[0];\n"
3045" ret.y = rv[1];\n"
3046" return ret;\n"
3047"}\n"
3048"inline __device__ float4 __ldg(const float4 *ptr) {\n"
3049" typedef float f4 __attribute__((ext_vector_type(4)));\n"
3050" f4 rv = __nvvm_ldg_f4(reinterpret_cast<const f4 *>(ptr));\n"
3051" float4 ret;\n"
3052" ret.x = rv[0];\n"
3053" ret.y = rv[1];\n"
3054" ret.z = rv[2];\n"
3055" ret.w = rv[3];\n"
3056" return ret;\n"
3057"}\n"
3058"inline __device__ double2 __ldg(const double2 *ptr) {\n"
3059" typedef double d2 __attribute__((ext_vector_type(2)));\n"
3060" d2 rv = __nvvm_ldg_d2(reinterpret_cast<const d2 *>(ptr));\n"
3061" double2 ret;\n"
3062" ret.x = rv[0];\n"
3063" ret.y = rv[1];\n"
3064" return ret;\n"
3065"}\n"
3066"\n"
3067"// TODO: Implement these as intrinsics, so the backend can work its magic on\n"
3068"// these. Alternatively, we could implement these as plain C and try to get\n"
3069"// llvm to recognize the relevant patterns.\n"
3070"inline __device__ unsigned __funnelshift_l(unsigned low32, unsigned high32,\n"
3071" unsigned shiftWidth) {\n"
3072" unsigned result;\n"
3073" asm(\"shf.l.wrap.b32 %0, %1, %2, %3;\"\n"
3074" : \"=r\"(result)\n"
3075" : \"r\"(low32), \"r\"(high32), \"r\"(shiftWidth));\n"
3076" return result;\n"
3077"}\n"
3078"inline __device__ unsigned __funnelshift_lc(unsigned low32, unsigned high32,\n"
3079" unsigned shiftWidth) {\n"
3080" unsigned result;\n"
3081" asm(\"shf.l.clamp.b32 %0, %1, %2, %3;\"\n"
3082" : \"=r\"(result)\n"
3083" : \"r\"(low32), \"r\"(high32), \"r\"(shiftWidth));\n"
3084" return result;\n"
3085"}\n"
3086"inline __device__ unsigned __funnelshift_r(unsigned low32, unsigned high32,\n"
3087" unsigned shiftWidth) {\n"
3088" unsigned result;\n"
3089" asm(\"shf.r.wrap.b32 %0, %1, %2, %3;\"\n"
3090" : \"=r\"(result)\n"
3091" : \"r\"(low32), \"r\"(high32), \"r\"(shiftWidth));\n"
3092" return result;\n"
3093"}\n"
3094"inline __device__ unsigned __funnelshift_rc(unsigned low32, unsigned high32,\n"
3095" unsigned shiftWidth) {\n"
3096" unsigned ret;\n"
3097" asm(\"shf.r.clamp.b32 %0, %1, %2, %3;\"\n"
3098" : \"=r\"(ret)\n"
3099" : \"r\"(low32), \"r\"(high32), \"r\"(shiftWidth));\n"
3100" return ret;\n"
3101"}\n"
3102"\n"
3103"#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 320\n"
3104"\n"
3105"#endif // defined(__CLANG_CUDA_INTRINSICS_H__)\n"
3106"" } ,
3107 { "/builtins/__clang_cuda_libdevice_declares.h" , "/*===-- __clang_cuda_libdevice_declares.h - decls for libdevice functions --===\n"
3108" *\n"
3109" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
3110" * of this software and associated documentation files (the \"Software\"), to deal\n"
3111" * in the Software without restriction, including without limitation the rights\n"
3112" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
3113" * copies of the Software, and to permit persons to whom the Software is\n"
3114" * furnished to do so, subject to the following conditions:\n"
3115" *\n"
3116" * The above copyright notice and this permission notice shall be included in\n"
3117" * all copies or substantial portions of the Software.\n"
3118" *\n"
3119" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
3120" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
3121" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
3122" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
3123" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
3124" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
3125" * THE SOFTWARE.\n"
3126" *\n"
3127" *===-----------------------------------------------------------------------===\n"
3128" */\n"
3129"\n"
3130"#ifndef __CLANG_CUDA_LIBDEVICE_DECLARES_H__\n"
3131"#define __CLANG_CUDA_LIBDEVICE_DECLARES_H__\n"
3132"\n"
3133"extern \"C\" {\n"
3134"\n"
3135"__device__ int __nv_abs(int __a);\n"
3136"__device__ double __nv_acos(double __a);\n"
3137"__device__ float __nv_acosf(float __a);\n"
3138"__device__ double __nv_acosh(double __a);\n"
3139"__device__ float __nv_acoshf(float __a);\n"
3140"__device__ double __nv_asin(double __a);\n"
3141"__device__ float __nv_asinf(float __a);\n"
3142"__device__ double __nv_asinh(double __a);\n"
3143"__device__ float __nv_asinhf(float __a);\n"
3144"__device__ double __nv_atan2(double __a, double __b);\n"
3145"__device__ float __nv_atan2f(float __a, float __b);\n"
3146"__device__ double __nv_atan(double __a);\n"
3147"__device__ float __nv_atanf(float __a);\n"
3148"__device__ double __nv_atanh(double __a);\n"
3149"__device__ float __nv_atanhf(float __a);\n"
3150"__device__ int __nv_brev(int __a);\n"
3151"__device__ long long __nv_brevll(long long __a);\n"
3152"__device__ int __nv_byte_perm(int __a, int __b, int __c);\n"
3153"__device__ double __nv_cbrt(double __a);\n"
3154"__device__ float __nv_cbrtf(float __a);\n"
3155"__device__ double __nv_ceil(double __a);\n"
3156"__device__ float __nv_ceilf(float __a);\n"
3157"__device__ int __nv_clz(int __a);\n"
3158"__device__ int __nv_clzll(long long __a);\n"
3159"__device__ double __nv_copysign(double __a, double __b);\n"
3160"__device__ float __nv_copysignf(float __a, float __b);\n"
3161"__device__ double __nv_cos(double __a);\n"
3162"__device__ float __nv_cosf(float __a);\n"
3163"__device__ double __nv_cosh(double __a);\n"
3164"__device__ float __nv_coshf(float __a);\n"
3165"__device__ double __nv_cospi(double __a);\n"
3166"__device__ float __nv_cospif(float __a);\n"
3167"__device__ double __nv_cyl_bessel_i0(double __a);\n"
3168"__device__ float __nv_cyl_bessel_i0f(float __a);\n"
3169"__device__ double __nv_cyl_bessel_i1(double __a);\n"
3170"__device__ float __nv_cyl_bessel_i1f(float __a);\n"
3171"__device__ double __nv_dadd_rd(double __a, double __b);\n"
3172"__device__ double __nv_dadd_rn(double __a, double __b);\n"
3173"__device__ double __nv_dadd_ru(double __a, double __b);\n"
3174"__device__ double __nv_dadd_rz(double __a, double __b);\n"
3175"__device__ double __nv_ddiv_rd(double __a, double __b);\n"
3176"__device__ double __nv_ddiv_rn(double __a, double __b);\n"
3177"__device__ double __nv_ddiv_ru(double __a, double __b);\n"
3178"__device__ double __nv_ddiv_rz(double __a, double __b);\n"
3179"__device__ double __nv_dmul_rd(double __a, double __b);\n"
3180"__device__ double __nv_dmul_rn(double __a, double __b);\n"
3181"__device__ double __nv_dmul_ru(double __a, double __b);\n"
3182"__device__ double __nv_dmul_rz(double __a, double __b);\n"
3183"__device__ float __nv_double2float_rd(double __a);\n"
3184"__device__ float __nv_double2float_rn(double __a);\n"
3185"__device__ float __nv_double2float_ru(double __a);\n"
3186"__device__ float __nv_double2float_rz(double __a);\n"
3187"__device__ int __nv_double2hiint(double __a);\n"
3188"__device__ int __nv_double2int_rd(double __a);\n"
3189"__device__ int __nv_double2int_rn(double __a);\n"
3190"__device__ int __nv_double2int_ru(double __a);\n"
3191"__device__ int __nv_double2int_rz(double __a);\n"
3192"__device__ long long __nv_double2ll_rd(double __a);\n"
3193"__device__ long long __nv_double2ll_rn(double __a);\n"
3194"__device__ long long __nv_double2ll_ru(double __a);\n"
3195"__device__ long long __nv_double2ll_rz(double __a);\n"
3196"__device__ int __nv_double2loint(double __a);\n"
3197"__device__ unsigned int __nv_double2uint_rd(double __a);\n"
3198"__device__ unsigned int __nv_double2uint_rn(double __a);\n"
3199"__device__ unsigned int __nv_double2uint_ru(double __a);\n"
3200"__device__ unsigned int __nv_double2uint_rz(double __a);\n"
3201"__device__ unsigned long long __nv_double2ull_rd(double __a);\n"
3202"__device__ unsigned long long __nv_double2ull_rn(double __a);\n"
3203"__device__ unsigned long long __nv_double2ull_ru(double __a);\n"
3204"__device__ unsigned long long __nv_double2ull_rz(double __a);\n"
3205"__device__ unsigned long long __nv_double_as_longlong(double __a);\n"
3206"__device__ double __nv_drcp_rd(double __a);\n"
3207"__device__ double __nv_drcp_rn(double __a);\n"
3208"__device__ double __nv_drcp_ru(double __a);\n"
3209"__device__ double __nv_drcp_rz(double __a);\n"
3210"__device__ double __nv_dsqrt_rd(double __a);\n"
3211"__device__ double __nv_dsqrt_rn(double __a);\n"
3212"__device__ double __nv_dsqrt_ru(double __a);\n"
3213"__device__ double __nv_dsqrt_rz(double __a);\n"
3214"__device__ double __nv_dsub_rd(double __a, double __b);\n"
3215"__device__ double __nv_dsub_rn(double __a, double __b);\n"
3216"__device__ double __nv_dsub_ru(double __a, double __b);\n"
3217"__device__ double __nv_dsub_rz(double __a, double __b);\n"
3218"__device__ double __nv_erfc(double __a);\n"
3219"__device__ float __nv_erfcf(float __a);\n"
3220"__device__ double __nv_erfcinv(double __a);\n"
3221"__device__ float __nv_erfcinvf(float __a);\n"
3222"__device__ double __nv_erfcx(double __a);\n"
3223"__device__ float __nv_erfcxf(float __a);\n"
3224"__device__ double __nv_erf(double __a);\n"
3225"__device__ float __nv_erff(float __a);\n"
3226"__device__ double __nv_erfinv(double __a);\n"
3227"__device__ float __nv_erfinvf(float __a);\n"
3228"__device__ double __nv_exp10(double __a);\n"
3229"__device__ float __nv_exp10f(float __a);\n"
3230"__device__ double __nv_exp2(double __a);\n"
3231"__device__ float __nv_exp2f(float __a);\n"
3232"__device__ double __nv_exp(double __a);\n"
3233"__device__ float __nv_expf(float __a);\n"
3234"__device__ double __nv_expm1(double __a);\n"
3235"__device__ float __nv_expm1f(float __a);\n"
3236"__device__ double __nv_fabs(double __a);\n"
3237"__device__ float __nv_fabsf(float __a);\n"
3238"__device__ float __nv_fadd_rd(float __a, float __b);\n"
3239"__device__ float __nv_fadd_rn(float __a, float __b);\n"
3240"__device__ float __nv_fadd_ru(float __a, float __b);\n"
3241"__device__ float __nv_fadd_rz(float __a, float __b);\n"
3242"__device__ float __nv_fast_cosf(float __a);\n"
3243"__device__ float __nv_fast_exp10f(float __a);\n"
3244"__device__ float __nv_fast_expf(float __a);\n"
3245"__device__ float __nv_fast_fdividef(float __a, float __b);\n"
3246"__device__ float __nv_fast_log10f(float __a);\n"
3247"__device__ float __nv_fast_log2f(float __a);\n"
3248"__device__ float __nv_fast_logf(float __a);\n"
3249"__device__ float __nv_fast_powf(float __a, float __b);\n"
3250"__device__ void __nv_fast_sincosf(float __a, float *__sptr, float *__cptr);\n"
3251"__device__ float __nv_fast_sinf(float __a);\n"
3252"__device__ float __nv_fast_tanf(float __a);\n"
3253"__device__ double __nv_fdim(double __a, double __b);\n"
3254"__device__ float __nv_fdimf(float __a, float __b);\n"
3255"__device__ float __nv_fdiv_rd(float __a, float __b);\n"
3256"__device__ float __nv_fdiv_rn(float __a, float __b);\n"
3257"__device__ float __nv_fdiv_ru(float __a, float __b);\n"
3258"__device__ float __nv_fdiv_rz(float __a, float __b);\n"
3259"__device__ int __nv_ffs(int __a);\n"
3260"__device__ int __nv_ffsll(long long __a);\n"
3261"__device__ int __nv_finitef(float __a);\n"
3262"__device__ unsigned short __nv_float2half_rn(float __a);\n"
3263"__device__ int __nv_float2int_rd(float __a);\n"
3264"__device__ int __nv_float2int_rn(float __a);\n"
3265"__device__ int __nv_float2int_ru(float __a);\n"
3266"__device__ int __nv_float2int_rz(float __a);\n"
3267"__device__ long long __nv_float2ll_rd(float __a);\n"
3268"__device__ long long __nv_float2ll_rn(float __a);\n"
3269"__device__ long long __nv_float2ll_ru(float __a);\n"
3270"__device__ long long __nv_float2ll_rz(float __a);\n"
3271"__device__ unsigned int __nv_float2uint_rd(float __a);\n"
3272"__device__ unsigned int __nv_float2uint_rn(float __a);\n"
3273"__device__ unsigned int __nv_float2uint_ru(float __a);\n"
3274"__device__ unsigned int __nv_float2uint_rz(float __a);\n"
3275"__device__ unsigned long long __nv_float2ull_rd(float __a);\n"
3276"__device__ unsigned long long __nv_float2ull_rn(float __a);\n"
3277"__device__ unsigned long long __nv_float2ull_ru(float __a);\n"
3278"__device__ unsigned long long __nv_float2ull_rz(float __a);\n"
3279"__device__ int __nv_float_as_int(float __a);\n"
3280"__device__ unsigned int __nv_float_as_uint(float __a);\n"
3281"__device__ double __nv_floor(double __a);\n"
3282"__device__ float __nv_floorf(float __a);\n"
3283"__device__ double __nv_fma(double __a, double __b, double __c);\n"
3284"__device__ float __nv_fmaf(float __a, float __b, float __c);\n"
3285"__device__ float __nv_fmaf_ieee_rd(float __a, float __b, float __c);\n"
3286"__device__ float __nv_fmaf_ieee_rn(float __a, float __b, float __c);\n"
3287"__device__ float __nv_fmaf_ieee_ru(float __a, float __b, float __c);\n"
3288"__device__ float __nv_fmaf_ieee_rz(float __a, float __b, float __c);\n"
3289"__device__ float __nv_fmaf_rd(float __a, float __b, float __c);\n"
3290"__device__ float __nv_fmaf_rn(float __a, float __b, float __c);\n"
3291"__device__ float __nv_fmaf_ru(float __a, float __b, float __c);\n"
3292"__device__ float __nv_fmaf_rz(float __a, float __b, float __c);\n"
3293"__device__ double __nv_fma_rd(double __a, double __b, double __c);\n"
3294"__device__ double __nv_fma_rn(double __a, double __b, double __c);\n"
3295"__device__ double __nv_fma_ru(double __a, double __b, double __c);\n"
3296"__device__ double __nv_fma_rz(double __a, double __b, double __c);\n"
3297"__device__ double __nv_fmax(double __a, double __b);\n"
3298"__device__ float __nv_fmaxf(float __a, float __b);\n"
3299"__device__ double __nv_fmin(double __a, double __b);\n"
3300"__device__ float __nv_fminf(float __a, float __b);\n"
3301"__device__ double __nv_fmod(double __a, double __b);\n"
3302"__device__ float __nv_fmodf(float __a, float __b);\n"
3303"__device__ float __nv_fmul_rd(float __a, float __b);\n"
3304"__device__ float __nv_fmul_rn(float __a, float __b);\n"
3305"__device__ float __nv_fmul_ru(float __a, float __b);\n"
3306"__device__ float __nv_fmul_rz(float __a, float __b);\n"
3307"__device__ float __nv_frcp_rd(float __a);\n"
3308"__device__ float __nv_frcp_rn(float __a);\n"
3309"__device__ float __nv_frcp_ru(float __a);\n"
3310"__device__ float __nv_frcp_rz(float __a);\n"
3311"__device__ double __nv_frexp(double __a, int *__b);\n"
3312"__device__ float __nv_frexpf(float __a, int *__b);\n"
3313"__device__ float __nv_frsqrt_rn(float __a);\n"
3314"__device__ float __nv_fsqrt_rd(float __a);\n"
3315"__device__ float __nv_fsqrt_rn(float __a);\n"
3316"__device__ float __nv_fsqrt_ru(float __a);\n"
3317"__device__ float __nv_fsqrt_rz(float __a);\n"
3318"__device__ float __nv_fsub_rd(float __a, float __b);\n"
3319"__device__ float __nv_fsub_rn(float __a, float __b);\n"
3320"__device__ float __nv_fsub_ru(float __a, float __b);\n"
3321"__device__ float __nv_fsub_rz(float __a, float __b);\n"
3322"__device__ int __nv_hadd(int __a, int __b);\n"
3323"__device__ float __nv_half2float(unsigned short __h);\n"
3324"__device__ double __nv_hiloint2double(int __a, int __b);\n"
3325"__device__ double __nv_hypot(double __a, double __b);\n"
3326"__device__ float __nv_hypotf(float __a, float __b);\n"
3327"__device__ int __nv_ilogb(double __a);\n"
3328"__device__ int __nv_ilogbf(float __a);\n"
3329"__device__ double __nv_int2double_rn(int __a);\n"
3330"__device__ float __nv_int2float_rd(int __a);\n"
3331"__device__ float __nv_int2float_rn(int __a);\n"
3332"__device__ float __nv_int2float_ru(int __a);\n"
3333"__device__ float __nv_int2float_rz(int __a);\n"
3334"__device__ float __nv_int_as_float(int __a);\n"
3335"__device__ int __nv_isfinited(double __a);\n"
3336"__device__ int __nv_isinfd(double __a);\n"
3337"__device__ int __nv_isinff(float __a);\n"
3338"__device__ int __nv_isnand(double __a);\n"
3339"__device__ int __nv_isnanf(float __a);\n"
3340"__device__ double __nv_j0(double __a);\n"
3341"__device__ float __nv_j0f(float __a);\n"
3342"__device__ double __nv_j1(double __a);\n"
3343"__device__ float __nv_j1f(float __a);\n"
3344"__device__ float __nv_jnf(int __a, float __b);\n"
3345"__device__ double __nv_jn(int __a, double __b);\n"
3346"__device__ double __nv_ldexp(double __a, int __b);\n"
3347"__device__ float __nv_ldexpf(float __a, int __b);\n"
3348"__device__ double __nv_lgamma(double __a);\n"
3349"__device__ float __nv_lgammaf(float __a);\n"
3350"__device__ double __nv_ll2double_rd(long long __a);\n"
3351"__device__ double __nv_ll2double_rn(long long __a);\n"
3352"__device__ double __nv_ll2double_ru(long long __a);\n"
3353"__device__ double __nv_ll2double_rz(long long __a);\n"
3354"__device__ float __nv_ll2float_rd(long long __a);\n"
3355"__device__ float __nv_ll2float_rn(long long __a);\n"
3356"__device__ float __nv_ll2float_ru(long long __a);\n"
3357"__device__ float __nv_ll2float_rz(long long __a);\n"
3358"__device__ long long __nv_llabs(long long __a);\n"
3359"__device__ long long __nv_llmax(long long __a, long long __b);\n"
3360"__device__ long long __nv_llmin(long long __a, long long __b);\n"
3361"__device__ long long __nv_llrint(double __a);\n"
3362"__device__ long long __nv_llrintf(float __a);\n"
3363"__device__ long long __nv_llround(double __a);\n"
3364"__device__ long long __nv_llroundf(float __a);\n"
3365"__device__ double __nv_log10(double __a);\n"
3366"__device__ float __nv_log10f(float __a);\n"
3367"__device__ double __nv_log1p(double __a);\n"
3368"__device__ float __nv_log1pf(float __a);\n"
3369"__device__ double __nv_log2(double __a);\n"
3370"__device__ float __nv_log2f(float __a);\n"
3371"__device__ double __nv_logb(double __a);\n"
3372"__device__ float __nv_logbf(float __a);\n"
3373"__device__ double __nv_log(double __a);\n"
3374"__device__ float __nv_logf(float __a);\n"
3375"__device__ double __nv_longlong_as_double(long long __a);\n"
3376"__device__ int __nv_max(int __a, int __b);\n"
3377"__device__ int __nv_min(int __a, int __b);\n"
3378"__device__ double __nv_modf(double __a, double *__b);\n"
3379"__device__ float __nv_modff(float __a, float *__b);\n"
3380"__device__ int __nv_mul24(int __a, int __b);\n"
3381"__device__ long long __nv_mul64hi(long long __a, long long __b);\n"
3382"__device__ int __nv_mulhi(int __a, int __b);\n"
3383"__device__ double __nv_nan(const signed char *__a);\n"
3384"__device__ float __nv_nanf(const signed char *__a);\n"
3385"__device__ double __nv_nearbyint(double __a);\n"
3386"__device__ float __nv_nearbyintf(float __a);\n"
3387"__device__ double __nv_nextafter(double __a, double __b);\n"
3388"__device__ float __nv_nextafterf(float __a, float __b);\n"
3389"__device__ double __nv_norm3d(double __a, double __b, double __c);\n"
3390"__device__ float __nv_norm3df(float __a, float __b, float __c);\n"
3391"__device__ double __nv_norm4d(double __a, double __b, double __c, double __d);\n"
3392"__device__ float __nv_norm4df(float __a, float __b, float __c, float __d);\n"
3393"__device__ double __nv_normcdf(double __a);\n"
3394"__device__ float __nv_normcdff(float __a);\n"
3395"__device__ double __nv_normcdfinv(double __a);\n"
3396"__device__ float __nv_normcdfinvf(float __a);\n"
3397"__device__ float __nv_normf(int __a, const float *__b);\n"
3398"__device__ double __nv_norm(int __a, const double *__b);\n"
3399"__device__ int __nv_popc(int __a);\n"
3400"__device__ int __nv_popcll(long long __a);\n"
3401"__device__ double __nv_pow(double __a, double __b);\n"
3402"__device__ float __nv_powf(float __a, float __b);\n"
3403"__device__ double __nv_powi(double __a, int __b);\n"
3404"__device__ float __nv_powif(float __a, int __b);\n"
3405"__device__ double __nv_rcbrt(double __a);\n"
3406"__device__ float __nv_rcbrtf(float __a);\n"
3407"__device__ double __nv_rcp64h(double __a);\n"
3408"__device__ double __nv_remainder(double __a, double __b);\n"
3409"__device__ float __nv_remainderf(float __a, float __b);\n"
3410"__device__ double __nv_remquo(double __a, double __b, int *__c);\n"
3411"__device__ float __nv_remquof(float __a, float __b, int *__c);\n"
3412"__device__ int __nv_rhadd(int __a, int __b);\n"
3413"__device__ double __nv_rhypot(double __a, double __b);\n"
3414"__device__ float __nv_rhypotf(float __a, float __b);\n"
3415"__device__ double __nv_rint(double __a);\n"
3416"__device__ float __nv_rintf(float __a);\n"
3417"__device__ double __nv_rnorm3d(double __a, double __b, double __c);\n"
3418"__device__ float __nv_rnorm3df(float __a, float __b, float __c);\n"
3419"__device__ double __nv_rnorm4d(double __a, double __b, double __c, double __d);\n"
3420"__device__ float __nv_rnorm4df(float __a, float __b, float __c, float __d);\n"
3421"__device__ float __nv_rnormf(int __a, const float *__b);\n"
3422"__device__ double __nv_rnorm(int __a, const double *__b);\n"
3423"__device__ double __nv_round(double __a);\n"
3424"__device__ float __nv_roundf(float __a);\n"
3425"__device__ double __nv_rsqrt(double __a);\n"
3426"__device__ float __nv_rsqrtf(float __a);\n"
3427"__device__ int __nv_sad(int __a, int __b, int __c);\n"
3428"__device__ float __nv_saturatef(float __a);\n"
3429"__device__ double __nv_scalbn(double __a, int __b);\n"
3430"__device__ float __nv_scalbnf(float __a, int __b);\n"
3431"__device__ int __nv_signbitd(double __a);\n"
3432"__device__ int __nv_signbitf(float __a);\n"
3433"__device__ void __nv_sincos(double __a, double *__b, double *__c);\n"
3434"__device__ void __nv_sincosf(float __a, float *__b, float *__c);\n"
3435"__device__ void __nv_sincospi(double __a, double *__b, double *__c);\n"
3436"__device__ void __nv_sincospif(float __a, float *__b, float *__c);\n"
3437"__device__ double __nv_sin(double __a);\n"
3438"__device__ float __nv_sinf(float __a);\n"
3439"__device__ double __nv_sinh(double __a);\n"
3440"__device__ float __nv_sinhf(float __a);\n"
3441"__device__ double __nv_sinpi(double __a);\n"
3442"__device__ float __nv_sinpif(float __a);\n"
3443"__device__ double __nv_sqrt(double __a);\n"
3444"__device__ float __nv_sqrtf(float __a);\n"
3445"__device__ double __nv_tan(double __a);\n"
3446"__device__ float __nv_tanf(float __a);\n"
3447"__device__ double __nv_tanh(double __a);\n"
3448"__device__ float __nv_tanhf(float __a);\n"
3449"__device__ double __nv_tgamma(double __a);\n"
3450"__device__ float __nv_tgammaf(float __a);\n"
3451"__device__ double __nv_trunc(double __a);\n"
3452"__device__ float __nv_truncf(float __a);\n"
3453"__device__ int __nv_uhadd(unsigned int __a, unsigned int __b);\n"
3454"__device__ double __nv_uint2double_rn(unsigned int __i);\n"
3455"__device__ float __nv_uint2float_rd(unsigned int __a);\n"
3456"__device__ float __nv_uint2float_rn(unsigned int __a);\n"
3457"__device__ float __nv_uint2float_ru(unsigned int __a);\n"
3458"__device__ float __nv_uint2float_rz(unsigned int __a);\n"
3459"__device__ float __nv_uint_as_float(unsigned int __a);\n"
3460"__device__ double __nv_ull2double_rd(unsigned long long __a);\n"
3461"__device__ double __nv_ull2double_rn(unsigned long long __a);\n"
3462"__device__ double __nv_ull2double_ru(unsigned long long __a);\n"
3463"__device__ double __nv_ull2double_rz(unsigned long long __a);\n"
3464"__device__ float __nv_ull2float_rd(unsigned long long __a);\n"
3465"__device__ float __nv_ull2float_rn(unsigned long long __a);\n"
3466"__device__ float __nv_ull2float_ru(unsigned long long __a);\n"
3467"__device__ float __nv_ull2float_rz(unsigned long long __a);\n"
3468"__device__ unsigned long long __nv_ullmax(unsigned long long __a,\n"
3469" unsigned long long __b);\n"
3470"__device__ unsigned long long __nv_ullmin(unsigned long long __a,\n"
3471" unsigned long long __b);\n"
3472"__device__ unsigned int __nv_umax(unsigned int __a, unsigned int __b);\n"
3473"__device__ unsigned int __nv_umin(unsigned int __a, unsigned int __b);\n"
3474"__device__ unsigned int __nv_umul24(unsigned int __a, unsigned int __b);\n"
3475"__device__ unsigned long long __nv_umul64hi(unsigned long long __a,\n"
3476" unsigned long long __b);\n"
3477"__device__ unsigned int __nv_umulhi(unsigned int __a, unsigned int __b);\n"
3478"__device__ unsigned int __nv_urhadd(unsigned int __a, unsigned int __b);\n"
3479"__device__ unsigned int __nv_usad(unsigned int __a, unsigned int __b,\n"
3480" unsigned int __c);\n"
3481"#if CUDA_VERSION >= 9000 && CUDA_VERSION < 9020\n"
3482"__device__ int __nv_vabs2(int __a);\n"
3483"__device__ int __nv_vabs4(int __a);\n"
3484"__device__ int __nv_vabsdiffs2(int __a, int __b);\n"
3485"__device__ int __nv_vabsdiffs4(int __a, int __b);\n"
3486"__device__ int __nv_vabsdiffu2(int __a, int __b);\n"
3487"__device__ int __nv_vabsdiffu4(int __a, int __b);\n"
3488"__device__ int __nv_vabsss2(int __a);\n"
3489"__device__ int __nv_vabsss4(int __a);\n"
3490"__device__ int __nv_vadd2(int __a, int __b);\n"
3491"__device__ int __nv_vadd4(int __a, int __b);\n"
3492"__device__ int __nv_vaddss2(int __a, int __b);\n"
3493"__device__ int __nv_vaddss4(int __a, int __b);\n"
3494"__device__ int __nv_vaddus2(int __a, int __b);\n"
3495"__device__ int __nv_vaddus4(int __a, int __b);\n"
3496"__device__ int __nv_vavgs2(int __a, int __b);\n"
3497"__device__ int __nv_vavgs4(int __a, int __b);\n"
3498"__device__ int __nv_vavgu2(int __a, int __b);\n"
3499"__device__ int __nv_vavgu4(int __a, int __b);\n"
3500"__device__ int __nv_vcmpeq2(int __a, int __b);\n"
3501"__device__ int __nv_vcmpeq4(int __a, int __b);\n"
3502"__device__ int __nv_vcmpges2(int __a, int __b);\n"
3503"__device__ int __nv_vcmpges4(int __a, int __b);\n"
3504"__device__ int __nv_vcmpgeu2(int __a, int __b);\n"
3505"__device__ int __nv_vcmpgeu4(int __a, int __b);\n"
3506"__device__ int __nv_vcmpgts2(int __a, int __b);\n"
3507"__device__ int __nv_vcmpgts4(int __a, int __b);\n"
3508"__device__ int __nv_vcmpgtu2(int __a, int __b);\n"
3509"__device__ int __nv_vcmpgtu4(int __a, int __b);\n"
3510"__device__ int __nv_vcmples2(int __a, int __b);\n"
3511"__device__ int __nv_vcmples4(int __a, int __b);\n"
3512"__device__ int __nv_vcmpleu2(int __a, int __b);\n"
3513"__device__ int __nv_vcmpleu4(int __a, int __b);\n"
3514"__device__ int __nv_vcmplts2(int __a, int __b);\n"
3515"__device__ int __nv_vcmplts4(int __a, int __b);\n"
3516"__device__ int __nv_vcmpltu2(int __a, int __b);\n"
3517"__device__ int __nv_vcmpltu4(int __a, int __b);\n"
3518"__device__ int __nv_vcmpne2(int __a, int __b);\n"
3519"__device__ int __nv_vcmpne4(int __a, int __b);\n"
3520"__device__ int __nv_vhaddu2(int __a, int __b);\n"
3521"__device__ int __nv_vhaddu4(int __a, int __b);\n"
3522"__device__ int __nv_vmaxs2(int __a, int __b);\n"
3523"__device__ int __nv_vmaxs4(int __a, int __b);\n"
3524"__device__ int __nv_vmaxu2(int __a, int __b);\n"
3525"__device__ int __nv_vmaxu4(int __a, int __b);\n"
3526"__device__ int __nv_vmins2(int __a, int __b);\n"
3527"__device__ int __nv_vmins4(int __a, int __b);\n"
3528"__device__ int __nv_vminu2(int __a, int __b);\n"
3529"__device__ int __nv_vminu4(int __a, int __b);\n"
3530"__device__ int __nv_vneg2(int __a);\n"
3531"__device__ int __nv_vneg4(int __a);\n"
3532"__device__ int __nv_vnegss2(int __a);\n"
3533"__device__ int __nv_vnegss4(int __a);\n"
3534"__device__ int __nv_vsads2(int __a, int __b);\n"
3535"__device__ int __nv_vsads4(int __a, int __b);\n"
3536"__device__ int __nv_vsadu2(int __a, int __b);\n"
3537"__device__ int __nv_vsadu4(int __a, int __b);\n"
3538"__device__ int __nv_vseteq2(int __a, int __b);\n"
3539"__device__ int __nv_vseteq4(int __a, int __b);\n"
3540"__device__ int __nv_vsetges2(int __a, int __b);\n"
3541"__device__ int __nv_vsetges4(int __a, int __b);\n"
3542"__device__ int __nv_vsetgeu2(int __a, int __b);\n"
3543"__device__ int __nv_vsetgeu4(int __a, int __b);\n"
3544"__device__ int __nv_vsetgts2(int __a, int __b);\n"
3545"__device__ int __nv_vsetgts4(int __a, int __b);\n"
3546"__device__ int __nv_vsetgtu2(int __a, int __b);\n"
3547"__device__ int __nv_vsetgtu4(int __a, int __b);\n"
3548"__device__ int __nv_vsetles2(int __a, int __b);\n"
3549"__device__ int __nv_vsetles4(int __a, int __b);\n"
3550"__device__ int __nv_vsetleu2(int __a, int __b);\n"
3551"__device__ int __nv_vsetleu4(int __a, int __b);\n"
3552"__device__ int __nv_vsetlts2(int __a, int __b);\n"
3553"__device__ int __nv_vsetlts4(int __a, int __b);\n"
3554"__device__ int __nv_vsetltu2(int __a, int __b);\n"
3555"__device__ int __nv_vsetltu4(int __a, int __b);\n"
3556"__device__ int __nv_vsetne2(int __a, int __b);\n"
3557"__device__ int __nv_vsetne4(int __a, int __b);\n"
3558"__device__ int __nv_vsub2(int __a, int __b);\n"
3559"__device__ int __nv_vsub4(int __a, int __b);\n"
3560"__device__ int __nv_vsubss2(int __a, int __b);\n"
3561"__device__ int __nv_vsubss4(int __a, int __b);\n"
3562"__device__ int __nv_vsubus2(int __a, int __b);\n"
3563"__device__ int __nv_vsubus4(int __a, int __b);\n"
3564"#endif // CUDA_VERSION\n"
3565"__device__ double __nv_y0(double __a);\n"
3566"__device__ float __nv_y0f(float __a);\n"
3567"__device__ double __nv_y1(double __a);\n"
3568"__device__ float __nv_y1f(float __a);\n"
3569"__device__ float __nv_ynf(int __a, float __b);\n"
3570"__device__ double __nv_yn(int __a, double __b);\n"
3571"} // extern \"C\"\n"
3572"#endif // __CLANG_CUDA_LIBDEVICE_DECLARES_H__\n"
3573"" } ,
3574 { "/builtins/__clang_cuda_math_forward_declares.h" , "/*===- __clang_math_forward_declares.h - Prototypes of __device__ math fns --===\n"
3575" *\n"
3576" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
3577" * of this software and associated documentation files (the \"Software\"), to deal\n"
3578" * in the Software without restriction, including without limitation the rights\n"
3579" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
3580" * copies of the Software, and to permit persons to whom the Software is\n"
3581" * furnished to do so, subject to the following conditions:\n"
3582" *\n"
3583" * The above copyright notice and this permission notice shall be included in\n"
3584" * all copies or substantial portions of the Software.\n"
3585" *\n"
3586" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
3587" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
3588" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
3589" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
3590" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
3591" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
3592" * THE SOFTWARE.\n"
3593" *\n"
3594" *===-----------------------------------------------------------------------===\n"
3595" */\n"
3596"#ifndef __CLANG__CUDA_MATH_FORWARD_DECLARES_H__\n"
3597"#define __CLANG__CUDA_MATH_FORWARD_DECLARES_H__\n"
3598"#ifndef __CUDA__\n"
3599"#error \"This file is for CUDA compilation only.\"\n"
3600"#endif\n"
3601"\n"
3602"// This file forward-declares of some math functions we (or the CUDA headers)\n"
3603"// will define later. We need to do this, and do it before cmath is included,\n"
3604"// because the standard library may have constexpr math functions. In the\n"
3605"// absence of a prior __device__ decl, those constexpr functions may become\n"
3606"// implicitly host+device. host+device functions can't be overloaded, so that\n"
3607"// would preclude the use of our own __device__ overloads for these functions.\n"
3608"\n"
3609"#pragma push_macro(\"__DEVICE__\")\n"
3610"#define __DEVICE__ \\\n"
3611" static __inline__ __attribute__((always_inline)) __attribute__((device))\n"
3612"\n"
3613"__DEVICE__ double abs(double);\n"
3614"__DEVICE__ float abs(float);\n"
3615"__DEVICE__ int abs(int);\n"
3616"__DEVICE__ long abs(long);\n"
3617"__DEVICE__ long long abs(long long);\n"
3618"__DEVICE__ double acos(double);\n"
3619"__DEVICE__ float acos(float);\n"
3620"__DEVICE__ double acosh(double);\n"
3621"__DEVICE__ float acosh(float);\n"
3622"__DEVICE__ double asin(double);\n"
3623"__DEVICE__ float asin(float);\n"
3624"__DEVICE__ double asinh(double);\n"
3625"__DEVICE__ float asinh(float);\n"
3626"__DEVICE__ double atan2(double, double);\n"
3627"__DEVICE__ float atan2(float, float);\n"
3628"__DEVICE__ double atan(double);\n"
3629"__DEVICE__ float atan(float);\n"
3630"__DEVICE__ double atanh(double);\n"
3631"__DEVICE__ float atanh(float);\n"
3632"__DEVICE__ double cbrt(double);\n"
3633"__DEVICE__ float cbrt(float);\n"
3634"__DEVICE__ double ceil(double);\n"
3635"__DEVICE__ float ceil(float);\n"
3636"__DEVICE__ double copysign(double, double);\n"
3637"__DEVICE__ float copysign(float, float);\n"
3638"__DEVICE__ double cos(double);\n"
3639"__DEVICE__ float cos(float);\n"
3640"__DEVICE__ double cosh(double);\n"
3641"__DEVICE__ float cosh(float);\n"
3642"__DEVICE__ double erfc(double);\n"
3643"__DEVICE__ float erfc(float);\n"
3644"__DEVICE__ double erf(double);\n"
3645"__DEVICE__ float erf(float);\n"
3646"__DEVICE__ double exp2(double);\n"
3647"__DEVICE__ float exp2(float);\n"
3648"__DEVICE__ double exp(double);\n"
3649"__DEVICE__ float exp(float);\n"
3650"__DEVICE__ double expm1(double);\n"
3651"__DEVICE__ float expm1(float);\n"
3652"__DEVICE__ double fabs(double);\n"
3653"__DEVICE__ float fabs(float);\n"
3654"__DEVICE__ double fdim(double, double);\n"
3655"__DEVICE__ float fdim(float, float);\n"
3656"__DEVICE__ double floor(double);\n"
3657"__DEVICE__ float floor(float);\n"
3658"__DEVICE__ double fma(double, double, double);\n"
3659"__DEVICE__ float fma(float, float, float);\n"
3660"__DEVICE__ double fmax(double, double);\n"
3661"__DEVICE__ float fmax(float, float);\n"
3662"__DEVICE__ double fmin(double, double);\n"
3663"__DEVICE__ float fmin(float, float);\n"
3664"__DEVICE__ double fmod(double, double);\n"
3665"__DEVICE__ float fmod(float, float);\n"
3666"__DEVICE__ int fpclassify(double);\n"
3667"__DEVICE__ int fpclassify(float);\n"
3668"__DEVICE__ double frexp(double, int *);\n"
3669"__DEVICE__ float frexp(float, int *);\n"
3670"__DEVICE__ double hypot(double, double);\n"
3671"__DEVICE__ float hypot(float, float);\n"
3672"__DEVICE__ int ilogb(double);\n"
3673"__DEVICE__ int ilogb(float);\n"
3674"__DEVICE__ bool isfinite(double);\n"
3675"__DEVICE__ bool isfinite(float);\n"
3676"__DEVICE__ bool isgreater(double, double);\n"
3677"__DEVICE__ bool isgreaterequal(double, double);\n"
3678"__DEVICE__ bool isgreaterequal(float, float);\n"
3679"__DEVICE__ bool isgreater(float, float);\n"
3680"__DEVICE__ bool isinf(double);\n"
3681"__DEVICE__ bool isinf(float);\n"
3682"__DEVICE__ bool isless(double, double);\n"
3683"__DEVICE__ bool islessequal(double, double);\n"
3684"__DEVICE__ bool islessequal(float, float);\n"
3685"__DEVICE__ bool isless(float, float);\n"
3686"__DEVICE__ bool islessgreater(double, double);\n"
3687"__DEVICE__ bool islessgreater(float, float);\n"
3688"__DEVICE__ bool isnan(double);\n"
3689"__DEVICE__ bool isnan(float);\n"
3690"__DEVICE__ bool isnormal(double);\n"
3691"__DEVICE__ bool isnormal(float);\n"
3692"__DEVICE__ bool isunordered(double, double);\n"
3693"__DEVICE__ bool isunordered(float, float);\n"
3694"__DEVICE__ long labs(long);\n"
3695"__DEVICE__ double ldexp(double, int);\n"
3696"__DEVICE__ float ldexp(float, int);\n"
3697"__DEVICE__ double lgamma(double);\n"
3698"__DEVICE__ float lgamma(float);\n"
3699"__DEVICE__ long long llabs(long long);\n"
3700"__DEVICE__ long long llrint(double);\n"
3701"__DEVICE__ long long llrint(float);\n"
3702"__DEVICE__ double log10(double);\n"
3703"__DEVICE__ float log10(float);\n"
3704"__DEVICE__ double log1p(double);\n"
3705"__DEVICE__ float log1p(float);\n"
3706"__DEVICE__ double log2(double);\n"
3707"__DEVICE__ float log2(float);\n"
3708"__DEVICE__ double logb(double);\n"
3709"__DEVICE__ float logb(float);\n"
3710"__DEVICE__ double log(double);\n"
3711"__DEVICE__ float log(float);\n"
3712"__DEVICE__ long lrint(double);\n"
3713"__DEVICE__ long lrint(float);\n"
3714"__DEVICE__ long lround(double);\n"
3715"__DEVICE__ long lround(float);\n"
3716"__DEVICE__ long long llround(float); // No llround(double).\n"
3717"__DEVICE__ double modf(double, double *);\n"
3718"__DEVICE__ float modf(float, float *);\n"
3719"__DEVICE__ double nan(const char *);\n"
3720"__DEVICE__ float nanf(const char *);\n"
3721"__DEVICE__ double nearbyint(double);\n"
3722"__DEVICE__ float nearbyint(float);\n"
3723"__DEVICE__ double nextafter(double, double);\n"
3724"__DEVICE__ float nextafter(float, float);\n"
3725"__DEVICE__ double pow(double, double);\n"
3726"__DEVICE__ double pow(double, int);\n"
3727"__DEVICE__ float pow(float, float);\n"
3728"__DEVICE__ float pow(float, int);\n"
3729"__DEVICE__ double remainder(double, double);\n"
3730"__DEVICE__ float remainder(float, float);\n"
3731"__DEVICE__ double remquo(double, double, int *);\n"
3732"__DEVICE__ float remquo(float, float, int *);\n"
3733"__DEVICE__ double rint(double);\n"
3734"__DEVICE__ float rint(float);\n"
3735"__DEVICE__ double round(double);\n"
3736"__DEVICE__ float round(float);\n"
3737"__DEVICE__ double scalbln(double, long);\n"
3738"__DEVICE__ float scalbln(float, long);\n"
3739"__DEVICE__ double scalbn(double, int);\n"
3740"__DEVICE__ float scalbn(float, int);\n"
3741"__DEVICE__ bool signbit(double);\n"
3742"__DEVICE__ bool signbit(float);\n"
3743"__DEVICE__ double sin(double);\n"
3744"__DEVICE__ float sin(float);\n"
3745"__DEVICE__ double sinh(double);\n"
3746"__DEVICE__ float sinh(float);\n"
3747"__DEVICE__ double sqrt(double);\n"
3748"__DEVICE__ float sqrt(float);\n"
3749"__DEVICE__ double tan(double);\n"
3750"__DEVICE__ float tan(float);\n"
3751"__DEVICE__ double tanh(double);\n"
3752"__DEVICE__ float tanh(float);\n"
3753"__DEVICE__ double tgamma(double);\n"
3754"__DEVICE__ float tgamma(float);\n"
3755"__DEVICE__ double trunc(double);\n"
3756"__DEVICE__ float trunc(float);\n"
3757"\n"
3758"// Notably missing above is nexttoward, which we don't define on\n"
3759"// the device side because libdevice doesn't give us an implementation, and we\n"
3760"// don't want to be in the business of writing one ourselves.\n"
3761"\n"
3762"// We need to define these overloads in exactly the namespace our standard\n"
3763"// library uses (including the right inline namespace), otherwise they won't be\n"
3764"// picked up by other functions in the standard library (e.g. functions in\n"
3765"// <complex>). Thus the ugliness below.\n"
3766"#ifdef _LIBCPP_BEGIN_NAMESPACE_STD\n"
3767"_LIBCPP_BEGIN_NAMESPACE_STD\n"
3768"#else\n"
3769"namespace std {\n"
3770"#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION\n"
3771"_GLIBCXX_BEGIN_NAMESPACE_VERSION\n"
3772"#endif\n"
3773"#endif\n"
3774"\n"
3775"using ::abs;\n"
3776"using ::acos;\n"
3777"using ::acosh;\n"
3778"using ::asin;\n"
3779"using ::asinh;\n"
3780"using ::atan;\n"
3781"using ::atan2;\n"
3782"using ::atanh;\n"
3783"using ::cbrt;\n"
3784"using ::ceil;\n"
3785"using ::copysign;\n"
3786"using ::cos;\n"
3787"using ::cosh;\n"
3788"using ::erf;\n"
3789"using ::erfc;\n"
3790"using ::exp;\n"
3791"using ::exp2;\n"
3792"using ::expm1;\n"
3793"using ::fabs;\n"
3794"using ::fdim;\n"
3795"using ::floor;\n"
3796"using ::fma;\n"
3797"using ::fmax;\n"
3798"using ::fmin;\n"
3799"using ::fmod;\n"
3800"using ::fpclassify;\n"
3801"using ::frexp;\n"
3802"using ::hypot;\n"
3803"using ::ilogb;\n"
3804"using ::isfinite;\n"
3805"using ::isgreater;\n"
3806"using ::isgreaterequal;\n"
3807"using ::isinf;\n"
3808"using ::isless;\n"
3809"using ::islessequal;\n"
3810"using ::islessgreater;\n"
3811"using ::isnan;\n"
3812"using ::isnormal;\n"
3813"using ::isunordered;\n"
3814"using ::labs;\n"
3815"using ::ldexp;\n"
3816"using ::lgamma;\n"
3817"using ::llabs;\n"
3818"using ::llrint;\n"
3819"using ::log;\n"
3820"using ::log10;\n"
3821"using ::log1p;\n"
3822"using ::log2;\n"
3823"using ::logb;\n"
3824"using ::lrint;\n"
3825"using ::lround;\n"
3826"using ::llround;\n"
3827"using ::modf;\n"
3828"using ::nan;\n"
3829"using ::nanf;\n"
3830"using ::nearbyint;\n"
3831"using ::nextafter;\n"
3832"using ::pow;\n"
3833"using ::remainder;\n"
3834"using ::remquo;\n"
3835"using ::rint;\n"
3836"using ::round;\n"
3837"using ::scalbln;\n"
3838"using ::scalbn;\n"
3839"using ::signbit;\n"
3840"using ::sin;\n"
3841"using ::sinh;\n"
3842"using ::sqrt;\n"
3843"using ::tan;\n"
3844"using ::tanh;\n"
3845"using ::tgamma;\n"
3846"using ::trunc;\n"
3847"\n"
3848"#ifdef _LIBCPP_END_NAMESPACE_STD\n"
3849"_LIBCPP_END_NAMESPACE_STD\n"
3850"#else\n"
3851"#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION\n"
3852"_GLIBCXX_END_NAMESPACE_VERSION\n"
3853"#endif\n"
3854"} // namespace std\n"
3855"#endif\n"
3856"\n"
3857"#pragma pop_macro(\"__DEVICE__\")\n"
3858"\n"
3859"#endif\n"
3860"" } ,
3861 { "/builtins/__clang_cuda_runtime_wrapper.h" , "/*===---- __clang_cuda_runtime_wrapper.h - CUDA runtime support -------------===\n"
3862" *\n"
3863" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
3864" * of this software and associated documentation files (the \"Software\"), to deal\n"
3865" * in the Software without restriction, including without limitation the rights\n"
3866" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
3867" * copies of the Software, and to permit persons to whom the Software is\n"
3868" * furnished to do so, subject to the following conditions:\n"
3869" *\n"
3870" * The above copyright notice and this permission notice shall be included in\n"
3871" * all copies or substantial portions of the Software.\n"
3872" *\n"
3873" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
3874" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
3875" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
3876" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
3877" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
3878" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
3879" * THE SOFTWARE.\n"
3880" *\n"
3881" *===-----------------------------------------------------------------------===\n"
3882" */\n"
3883"\n"
3884"/*\n"
3885" * WARNING: This header is intended to be directly -include'd by\n"
3886" * the compiler and is not supposed to be included by users.\n"
3887" *\n"
3888" * CUDA headers are implemented in a way that currently makes it\n"
3889" * impossible for user code to #include directly when compiling with\n"
3890" * Clang. They present different view of CUDA-supplied functions\n"
3891" * depending on where in NVCC's compilation pipeline the headers are\n"
3892" * included. Neither of these modes provides function definitions with\n"
3893" * correct attributes, so we use preprocessor to force the headers\n"
3894" * into a form that Clang can use.\n"
3895" *\n"
3896" * Similarly to NVCC which -include's cuda_runtime.h, Clang -include's\n"
3897" * this file during every CUDA compilation.\n"
3898" */\n"
3899"\n"
3900"#ifndef __CLANG_CUDA_RUNTIME_WRAPPER_H__\n"
3901"#define __CLANG_CUDA_RUNTIME_WRAPPER_H__\n"
3902"\n"
3903"#if defined(__CUDA__) && defined(__clang__)\n"
3904"\n"
3905"// Include some forward declares that must come before cmath.\n"
3906"#include <__clang_cuda_math_forward_declares.h>\n"
3907"\n"
3908"// Include some standard headers to avoid CUDA headers including them\n"
3909"// while some required macros (like __THROW) are in a weird state.\n"
3910"#include <cmath>\n"
3911"#include <cstdlib>\n"
3912"#include <stdlib.h>\n"
3913"\n"
3914"// Preserve common macros that will be changed below by us or by CUDA\n"
3915"// headers.\n"
3916"#pragma push_macro(\"__THROW\")\n"
3917"#pragma push_macro(\"__CUDA_ARCH__\")\n"
3918"\n"
3919"// WARNING: Preprocessor hacks below are based on specific details of\n"
3920"// CUDA-7.x headers and are not expected to work with any other\n"
3921"// version of CUDA headers.\n"
3922"#include \"cuda.h\"\n"
3923"#if !defined(CUDA_VERSION)\n"
3924"#error \"cuda.h did not define CUDA_VERSION\"\n"
3925"#elif CUDA_VERSION < 7000 || CUDA_VERSION > 9020\n"
3926"#error \"Unsupported CUDA version!\"\n"
3927"#endif\n"
3928"\n"
3929"// Make largest subset of device functions available during host\n"
3930"// compilation -- SM_35 for the time being.\n"
3931"#ifndef __CUDA_ARCH__\n"
3932"#define __CUDA_ARCH__ 350\n"
3933"#endif\n"
3934"\n"
3935"#include \"__clang_cuda_builtin_vars.h\"\n"
3936"\n"
3937"// No need for device_launch_parameters.h as __clang_cuda_builtin_vars.h above\n"
3938"// has taken care of builtin variables declared in the file.\n"
3939"#define __DEVICE_LAUNCH_PARAMETERS_H__\n"
3940"\n"
3941"// {math,device}_functions.h only have declarations of the\n"
3942"// functions. We don't need them as we're going to pull in their\n"
3943"// definitions from .hpp files.\n"
3944"#define __DEVICE_FUNCTIONS_H__\n"
3945"#define __MATH_FUNCTIONS_H__\n"
3946"#define __COMMON_FUNCTIONS_H__\n"
3947"// device_functions_decls is replaced by __clang_cuda_device_functions.h\n"
3948"// included below.\n"
3949"#define __DEVICE_FUNCTIONS_DECLS_H__\n"
3950"\n"
3951"#undef __CUDACC__\n"
3952"#if CUDA_VERSION < 9000\n"
3953"#define __CUDABE__\n"
3954"#else\n"
3955"#define __CUDA_LIBDEVICE__\n"
3956"#endif\n"
3957"// Disables definitions of device-side runtime support stubs in\n"
3958"// cuda_device_runtime_api.h\n"
3959"#include \"driver_types.h\"\n"
3960"#include \"host_config.h\"\n"
3961"#include \"host_defines.h\"\n"
3962"\n"
3963"// Temporarily replace \"nv_weak\" with weak, so __attribute__((nv_weak)) in\n"
3964"// cuda_device_runtime_api.h ends up being __attribute__((weak)) which is the\n"
3965"// functional equivalent of what we need.\n"
3966"#pragma push_macro(\"nv_weak\")\n"
3967"#define nv_weak weak\n"
3968"#undef __CUDABE__\n"
3969"#undef __CUDA_LIBDEVICE__\n"
3970"#define __CUDACC__\n"
3971"#include \"cuda_runtime.h\"\n"
3972"\n"
3973"#pragma pop_macro(\"nv_weak\")\n"
3974"#undef __CUDACC__\n"
3975"#define __CUDABE__\n"
3976"\n"
3977"// CUDA headers use __nvvm_memcpy and __nvvm_memset which Clang does\n"
3978"// not have at the moment. Emulate them with a builtin memcpy/memset.\n"
3979"#define __nvvm_memcpy(s, d, n, a) __builtin_memcpy(s, d, n)\n"
3980"#define __nvvm_memset(d, c, n, a) __builtin_memset(d, c, n)\n"
3981"\n"
3982"#if CUDA_VERSION < 9000\n"
3983"#include \"crt/device_runtime.h\"\n"
3984"#endif\n"
3985"#include \"crt/host_runtime.h\"\n"
3986"// device_runtime.h defines __cxa_* macros that will conflict with\n"
3987"// cxxabi.h.\n"
3988"// FIXME: redefine these as __device__ functions.\n"
3989"#undef __cxa_vec_ctor\n"
3990"#undef __cxa_vec_cctor\n"
3991"#undef __cxa_vec_dtor\n"
3992"#undef __cxa_vec_new\n"
3993"#undef __cxa_vec_new2\n"
3994"#undef __cxa_vec_new3\n"
3995"#undef __cxa_vec_delete2\n"
3996"#undef __cxa_vec_delete\n"
3997"#undef __cxa_vec_delete3\n"
3998"#undef __cxa_pure_virtual\n"
3999"\n"
4000"// math_functions.hpp expects this host function be defined on MacOS, but it\n"
4001"// ends up not being there because of the games we play here. Just define it\n"
4002"// ourselves; it's simple enough.\n"
4003"#ifdef __APPLE__\n"
4004"inline __host__ double __signbitd(double x) {\n"
4005" return std::signbit(x);\n"
4006"}\n"
4007"#endif\n"
4008"\n"
4009"// CUDA 9.1 no longer provides declarations for libdevice functions, so we need\n"
4010"// to provide our own.\n"
4011"#include <__clang_cuda_libdevice_declares.h>\n"
4012"\n"
4013"// Wrappers for many device-side standard library functions became compiler\n"
4014"// builtins in CUDA-9 and have been removed from the CUDA headers. Clang now\n"
4015"// provides its own implementation of the wrappers.\n"
4016"#if CUDA_VERSION >= 9000\n"
4017"#include <__clang_cuda_device_functions.h>\n"
4018"#endif\n"
4019"\n"
4020"// __THROW is redefined to be empty by device_functions_decls.h in CUDA. Clang's\n"
4021"// counterpart does not do it, so we need to make it empty here to keep\n"
4022"// following CUDA includes happy.\n"
4023"#undef __THROW\n"
4024"#define __THROW\n"
4025"\n"
4026"// CUDA 8.0.41 relies on __USE_FAST_MATH__ and __CUDA_PREC_DIV's values.\n"
4027"// Previous versions used to check whether they are defined or not.\n"
4028"// CU_DEVICE_INVALID macro is only defined in 8.0.41, so we use it\n"
4029"// here to detect the switch.\n"
4030"\n"
4031"#if defined(CU_DEVICE_INVALID)\n"
4032"#if !defined(__USE_FAST_MATH__)\n"
4033"#define __USE_FAST_MATH__ 0\n"
4034"#endif\n"
4035"\n"
4036"#if !defined(__CUDA_PREC_DIV)\n"
4037"#define __CUDA_PREC_DIV 0\n"
4038"#endif\n"
4039"#endif\n"
4040"\n"
4041"// Temporarily poison __host__ macro to ensure it's not used by any of\n"
4042"// the headers we're about to include.\n"
4043"#pragma push_macro(\"__host__\")\n"
4044"#define __host__ UNEXPECTED_HOST_ATTRIBUTE\n"
4045"\n"
4046"// device_functions.hpp and math_functions*.hpp use 'static\n"
4047"// __forceinline__' (with no __device__) for definitions of device\n"
4048"// functions. Temporarily redefine __forceinline__ to include\n"
4049"// __device__.\n"
4050"#pragma push_macro(\"__forceinline__\")\n"
4051"#define __forceinline__ __device__ __inline__ __attribute__((always_inline))\n"
4052"#if CUDA_VERSION < 9000\n"
4053"#include \"device_functions.hpp\"\n"
4054"#endif\n"
4055"\n"
4056"// math_function.hpp uses the __USE_FAST_MATH__ macro to determine whether we\n"
4057"// get the slow-but-accurate or fast-but-inaccurate versions of functions like\n"
4058"// sin and exp. This is controlled in clang by -fcuda-approx-transcendentals.\n"
4059"//\n"
4060"// device_functions.hpp uses __USE_FAST_MATH__ for a different purpose (fast vs.\n"
4061"// slow divides), so we need to scope our define carefully here.\n"
4062"#pragma push_macro(\"__USE_FAST_MATH__\")\n"
4063"#if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__)\n"
4064"#define __USE_FAST_MATH__ 1\n"
4065"#endif\n"
4066"\n"
4067"#if CUDA_VERSION >= 9000\n"
4068"// CUDA-9.2 needs host-side memcpy for some host functions in\n"
4069"// device_functions.hpp\n"
4070"#if CUDA_VERSION >= 9020\n"
4071"#include <string.h>\n"
4072"#endif\n"
4073"#include \"crt/math_functions.hpp\"\n"
4074"#else\n"
4075"#include \"math_functions.hpp\"\n"
4076"#endif\n"
4077"\n"
4078"#pragma pop_macro(\"__USE_FAST_MATH__\")\n"
4079"\n"
4080"#if CUDA_VERSION < 9000\n"
4081"#include \"math_functions_dbl_ptx3.hpp\"\n"
4082"#endif\n"
4083"#pragma pop_macro(\"__forceinline__\")\n"
4084"\n"
4085"// Pull in host-only functions that are only available when neither\n"
4086"// __CUDACC__ nor __CUDABE__ are defined.\n"
4087"#undef __MATH_FUNCTIONS_HPP__\n"
4088"#undef __CUDABE__\n"
4089"#if CUDA_VERSION < 9000\n"
4090"#include \"math_functions.hpp\"\n"
4091"#endif\n"
4092"// Alas, additional overloads for these functions are hard to get to.\n"
4093"// Considering that we only need these overloads for a few functions,\n"
4094"// we can provide them here.\n"
4095"static inline float rsqrt(float __a) { return rsqrtf(__a); }\n"
4096"static inline float rcbrt(float __a) { return rcbrtf(__a); }\n"
4097"static inline float sinpi(float __a) { return sinpif(__a); }\n"
4098"static inline float cospi(float __a) { return cospif(__a); }\n"
4099"static inline void sincospi(float __a, float *__b, float *__c) {\n"
4100" return sincospif(__a, __b, __c);\n"
4101"}\n"
4102"static inline float erfcinv(float __a) { return erfcinvf(__a); }\n"
4103"static inline float normcdfinv(float __a) { return normcdfinvf(__a); }\n"
4104"static inline float normcdf(float __a) { return normcdff(__a); }\n"
4105"static inline float erfcx(float __a) { return erfcxf(__a); }\n"
4106"\n"
4107"#if CUDA_VERSION < 9000\n"
4108"// For some reason single-argument variant is not always declared by\n"
4109"// CUDA headers. Alas, device_functions.hpp included below needs it.\n"
4110"static inline __device__ void __brkpt(int __c) { __brkpt(); }\n"
4111"#endif\n"
4112"\n"
4113"// Now include *.hpp with definitions of various GPU functions. Alas,\n"
4114"// a lot of thins get declared/defined with __host__ attribute which\n"
4115"// we don't want and we have to define it out. We also have to include\n"
4116"// {device,math}_functions.hpp again in order to extract the other\n"
4117"// branch of #if/else inside.\n"
4118"#define __host__\n"
4119"#undef __CUDABE__\n"
4120"#define __CUDACC__\n"
4121"#if CUDA_VERSION >= 9000\n"
4122"// Some atomic functions became compiler builtins in CUDA-9 , so we need their\n"
4123"// declarations.\n"
4124"#include \"device_atomic_functions.h\"\n"
4125"#endif\n"
4126"#undef __DEVICE_FUNCTIONS_HPP__\n"
4127"#include \"device_atomic_functions.hpp\"\n"
4128"#if CUDA_VERSION >= 9000\n"
4129"#include \"crt/device_functions.hpp\"\n"
4130"#include \"crt/device_double_functions.hpp\"\n"
4131"#else\n"
4132"#include \"device_functions.hpp\"\n"
4133"#define __CUDABE__\n"
4134"#include \"device_double_functions.h\"\n"
4135"#undef __CUDABE__\n"
4136"#endif\n"
4137"#include \"sm_20_atomic_functions.hpp\"\n"
4138"#include \"sm_20_intrinsics.hpp\"\n"
4139"#include \"sm_32_atomic_functions.hpp\"\n"
4140"\n"
4141"// Don't include sm_30_intrinsics.h and sm_32_intrinsics.h. These define the\n"
4142"// __shfl and __ldg intrinsics using inline (volatile) asm, but we want to\n"
4143"// define them using builtins so that the optimizer can reason about and across\n"
4144"// these instructions. In particular, using intrinsics for ldg gets us the\n"
4145"// [addr+imm] addressing mode, which, although it doesn't actually exist in the\n"
4146"// hardware, seems to generate faster machine code because ptxas can more easily\n"
4147"// reason about our code.\n"
4148"\n"
4149"#if CUDA_VERSION >= 8000\n"
4150"#pragma push_macro(\"__CUDA_ARCH__\")\n"
4151"#undef __CUDA_ARCH__\n"
4152"#include \"sm_60_atomic_functions.hpp\"\n"
4153"#include \"sm_61_intrinsics.hpp\"\n"
4154"#pragma pop_macro(\"__CUDA_ARCH__\")\n"
4155"#endif\n"
4156"\n"
4157"#undef __MATH_FUNCTIONS_HPP__\n"
4158"\n"
4159"// math_functions.hpp defines ::signbit as a __host__ __device__ function. This\n"
4160"// conflicts with libstdc++'s constexpr ::signbit, so we have to rename\n"
4161"// math_function.hpp's ::signbit. It's guarded by #undef signbit, but that's\n"
4162"// conditional on __GNUC__. :)\n"
4163"#pragma push_macro(\"signbit\")\n"
4164"#pragma push_macro(\"__GNUC__\")\n"
4165"#undef __GNUC__\n"
4166"#define signbit __ignored_cuda_signbit\n"
4167"\n"
4168"// CUDA-9 omits device-side definitions of some math functions if it sees\n"
4169"// include guard from math.h wrapper from libstdc++. We have to undo the header\n"
4170"// guard temporarily to get the definitions we need.\n"
4171"#pragma push_macro(\"_GLIBCXX_MATH_H\")\n"
4172"#pragma push_macro(\"_LIBCPP_VERSION\")\n"
4173"#if CUDA_VERSION >= 9000\n"
4174"#undef _GLIBCXX_MATH_H\n"
4175"// We also need to undo another guard that checks for libc++ 3.8+\n"
4176"#ifdef _LIBCPP_VERSION\n"
4177"#define _LIBCPP_VERSION 3700\n"
4178"#endif\n"
4179"#endif\n"
4180"\n"
4181"#if CUDA_VERSION >= 9000\n"
4182"#include \"crt/math_functions.hpp\"\n"
4183"#else\n"
4184"#include \"math_functions.hpp\"\n"
4185"#endif\n"
4186"#pragma pop_macro(\"_GLIBCXX_MATH_H\")\n"
4187"#pragma pop_macro(\"_LIBCPP_VERSION\")\n"
4188"#pragma pop_macro(\"__GNUC__\")\n"
4189"#pragma pop_macro(\"signbit\")\n"
4190"\n"
4191"#pragma pop_macro(\"__host__\")\n"
4192"\n"
4193"#include \"texture_indirect_functions.h\"\n"
4194"\n"
4195"// Restore state of __CUDA_ARCH__ and __THROW we had on entry.\n"
4196"#pragma pop_macro(\"__CUDA_ARCH__\")\n"
4197"#pragma pop_macro(\"__THROW\")\n"
4198"\n"
4199"// Set up compiler macros expected to be seen during compilation.\n"
4200"#undef __CUDABE__\n"
4201"#define __CUDACC__\n"
4202"\n"
4203"extern \"C\" {\n"
4204"// Device-side CUDA system calls.\n"
4205"// http://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls\n"
4206"// We need these declarations and wrappers for device-side\n"
4207"// malloc/free/printf calls to work without relying on\n"
4208"// -fcuda-disable-target-call-checks option.\n"
4209"__device__ int vprintf(const char *, const char *);\n"
4210"__device__ void free(void *) __attribute((nothrow));\n"
4211"__device__ void *malloc(size_t) __attribute((nothrow)) __attribute__((malloc));\n"
4212"__device__ void __assertfail(const char *__message, const char *__file,\n"
4213" unsigned __line, const char *__function,\n"
4214" size_t __charSize) __attribute__((noreturn));\n"
4215"\n"
4216"// In order for standard assert() macro on linux to work we need to\n"
4217"// provide device-side __assert_fail()\n"
4218"__device__ static inline void __assert_fail(const char *__message,\n"
4219" const char *__file, unsigned __line,\n"
4220" const char *__function) {\n"
4221" __assertfail(__message, __file, __line, __function, sizeof(char));\n"
4222"}\n"
4223"\n"
4224"// Clang will convert printf into vprintf, but we still need\n"
4225"// device-side declaration for it.\n"
4226"__device__ int printf(const char *, ...);\n"
4227"} // extern \"C\"\n"
4228"\n"
4229"// We also need device-side std::malloc and std::free.\n"
4230"namespace std {\n"
4231"__device__ static inline void free(void *__ptr) { ::free(__ptr); }\n"
4232"__device__ static inline void *malloc(size_t __size) {\n"
4233" return ::malloc(__size);\n"
4234"}\n"
4235"} // namespace std\n"
4236"\n"
4237"// Out-of-line implementations from __clang_cuda_builtin_vars.h. These need to\n"
4238"// come after we've pulled in the definition of uint3 and dim3.\n"
4239"\n"
4240"__device__ inline __cuda_builtin_threadIdx_t::operator uint3() const {\n"
4241" uint3 ret;\n"
4242" ret.x = x;\n"
4243" ret.y = y;\n"
4244" ret.z = z;\n"
4245" return ret;\n"
4246"}\n"
4247"\n"
4248"__device__ inline __cuda_builtin_blockIdx_t::operator uint3() const {\n"
4249" uint3 ret;\n"
4250" ret.x = x;\n"
4251" ret.y = y;\n"
4252" ret.z = z;\n"
4253" return ret;\n"
4254"}\n"
4255"\n"
4256"__device__ inline __cuda_builtin_blockDim_t::operator dim3() const {\n"
4257" return dim3(x, y, z);\n"
4258"}\n"
4259"\n"
4260"__device__ inline __cuda_builtin_gridDim_t::operator dim3() const {\n"
4261" return dim3(x, y, z);\n"
4262"}\n"
4263"\n"
4264"#include <__clang_cuda_cmath.h>\n"
4265"#include <__clang_cuda_intrinsics.h>\n"
4266"#include <__clang_cuda_complex_builtins.h>\n"
4267"\n"
4268"// curand_mtgp32_kernel helpfully redeclares blockDim and threadIdx in host\n"
4269"// mode, giving them their \"proper\" types of dim3 and uint3. This is\n"
4270"// incompatible with the types we give in __clang_cuda_builtin_vars.h. As as\n"
4271"// hack, force-include the header (nvcc doesn't include it by default) but\n"
4272"// redefine dim3 and uint3 to our builtin types. (Thankfully dim3 and uint3 are\n"
4273"// only used here for the redeclarations of blockDim and threadIdx.)\n"
4274"#pragma push_macro(\"dim3\")\n"
4275"#pragma push_macro(\"uint3\")\n"
4276"#define dim3 __cuda_builtin_blockDim_t\n"
4277"#define uint3 __cuda_builtin_threadIdx_t\n"
4278"#include \"curand_mtgp32_kernel.h\"\n"
4279"#pragma pop_macro(\"dim3\")\n"
4280"#pragma pop_macro(\"uint3\")\n"
4281"#pragma pop_macro(\"__USE_FAST_MATH__\")\n"
4282"\n"
4283"#endif // __CUDA__\n"
4284"#endif // __CLANG_CUDA_RUNTIME_WRAPPER_H__\n"
4285"" } ,
4286 { "/builtins/__stddef_max_align_t.h" , "/*===---- __stddef_max_align_t.h - Definition of max_align_t for modules ---===\n"
4287" *\n"
4288" * Copyright (c) 2014 Chandler Carruth\n"
4289" *\n"
4290" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
4291" * of this software and associated documentation files (the \"Software\"), to deal\n"
4292" * in the Software without restriction, including without limitation the rights\n"
4293" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
4294" * copies of the Software, and to permit persons to whom the Software is\n"
4295" * furnished to do so, subject to the following conditions:\n"
4296" *\n"
4297" * The above copyright notice and this permission notice shall be included in\n"
4298" * all copies or substantial portions of the Software.\n"
4299" *\n"
4300" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
4301" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
4302" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
4303" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
4304" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
4305" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
4306" * THE SOFTWARE.\n"
4307" *\n"
4308" *===-----------------------------------------------------------------------===\n"
4309" */\n"
4310"\n"
4311"#ifndef __CLANG_MAX_ALIGN_T_DEFINED\n"
4312"#define __CLANG_MAX_ALIGN_T_DEFINED\n"
4313"\n"
4314"#if defined(_MSC_VER)\n"
4315"typedef double max_align_t;\n"
4316"#elif defined(__APPLE__)\n"
4317"typedef long double max_align_t;\n"
4318"#else\n"
4319"// Define 'max_align_t' to match the GCC definition.\n"
4320"typedef struct {\n"
4321" long long __clang_max_align_nonce1\n"
4322" __attribute__((__aligned__(__alignof__(long long))));\n"
4323" long double __clang_max_align_nonce2\n"
4324" __attribute__((__aligned__(__alignof__(long double))));\n"
4325"} max_align_t;\n"
4326"#endif\n"
4327"\n"
4328"#endif\n"
4329"" } ,
4330 { "/builtins/__wmmintrin_aes.h" , "/*===---- __wmmintrin_aes.h - AES intrinsics -------------------------------===\n"
4331" *\n"
4332" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
4333" * of this software and associated documentation files (the \"Software\"), to deal\n"
4334" * in the Software without restriction, including without limitation the rights\n"
4335" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
4336" * copies of the Software, and to permit persons to whom the Software is\n"
4337" * furnished to do so, subject to the following conditions:\n"
4338" *\n"
4339" * The above copyright notice and this permission notice shall be included in\n"
4340" * all copies or substantial portions of the Software.\n"
4341" *\n"
4342" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
4343" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
4344" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
4345" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
4346" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
4347" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
4348" * THE SOFTWARE.\n"
4349" *\n"
4350" *===-----------------------------------------------------------------------===\n"
4351" */\n"
4352"\n"
4353"#ifndef __WMMINTRIN_H\n"
4354"#error \"Never use <__wmmintrin_aes.h> directly; include <wmmintrin.h> instead.\"\n"
4355"#endif\n"
4356"\n"
4357"#ifndef __WMMINTRIN_AES_H\n"
4358"#define __WMMINTRIN_AES_H\n"
4359"\n"
4360"/* Define the default attributes for the functions in this file. */\n"
4361"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"aes\"), __min_vector_width__(128)))\n"
4362"\n"
4363"/// Performs a single round of AES encryption using the Equivalent\n"
4364"/// Inverse Cipher, transforming the state value from the first source\n"
4365"/// operand using a 128-bit round key value contained in the second source\n"
4366"/// operand, and writes the result to the destination.\n"
4367"///\n"
4368"/// \\headerfile <x86intrin.h>\n"
4369"///\n"
4370"/// This intrinsic corresponds to the <c> VAESENC </c> instruction.\n"
4371"///\n"
4372"/// \\param __V\n"
4373"/// A 128-bit integer vector containing the state value.\n"
4374"/// \\param __R\n"
4375"/// A 128-bit integer vector containing the round key value.\n"
4376"/// \\returns A 128-bit integer vector containing the encrypted value.\n"
4377"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
4378"_mm_aesenc_si128(__m128i __V, __m128i __R)\n"
4379"{\n"
4380" return (__m128i)__builtin_ia32_aesenc128((__v2di)__V, (__v2di)__R);\n"
4381"}\n"
4382"\n"
4383"/// Performs the final round of AES encryption using the Equivalent\n"
4384"/// Inverse Cipher, transforming the state value from the first source\n"
4385"/// operand using a 128-bit round key value contained in the second source\n"
4386"/// operand, and writes the result to the destination.\n"
4387"///\n"
4388"/// \\headerfile <x86intrin.h>\n"
4389"///\n"
4390"/// This intrinsic corresponds to the <c> VAESENCLAST </c> instruction.\n"
4391"///\n"
4392"/// \\param __V\n"
4393"/// A 128-bit integer vector containing the state value.\n"
4394"/// \\param __R\n"
4395"/// A 128-bit integer vector containing the round key value.\n"
4396"/// \\returns A 128-bit integer vector containing the encrypted value.\n"
4397"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
4398"_mm_aesenclast_si128(__m128i __V, __m128i __R)\n"
4399"{\n"
4400" return (__m128i)__builtin_ia32_aesenclast128((__v2di)__V, (__v2di)__R);\n"
4401"}\n"
4402"\n"
4403"/// Performs a single round of AES decryption using the Equivalent\n"
4404"/// Inverse Cipher, transforming the state value from the first source\n"
4405"/// operand using a 128-bit round key value contained in the second source\n"
4406"/// operand, and writes the result to the destination.\n"
4407"///\n"
4408"/// \\headerfile <x86intrin.h>\n"
4409"///\n"
4410"/// This intrinsic corresponds to the <c> VAESDEC </c> instruction.\n"
4411"///\n"
4412"/// \\param __V\n"
4413"/// A 128-bit integer vector containing the state value.\n"
4414"/// \\param __R\n"
4415"/// A 128-bit integer vector containing the round key value.\n"
4416"/// \\returns A 128-bit integer vector containing the decrypted value.\n"
4417"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
4418"_mm_aesdec_si128(__m128i __V, __m128i __R)\n"
4419"{\n"
4420" return (__m128i)__builtin_ia32_aesdec128((__v2di)__V, (__v2di)__R);\n"
4421"}\n"
4422"\n"
4423"/// Performs the final round of AES decryption using the Equivalent\n"
4424"/// Inverse Cipher, transforming the state value from the first source\n"
4425"/// operand using a 128-bit round key value contained in the second source\n"
4426"/// operand, and writes the result to the destination.\n"
4427"///\n"
4428"/// \\headerfile <x86intrin.h>\n"
4429"///\n"
4430"/// This intrinsic corresponds to the <c> VAESDECLAST </c> instruction.\n"
4431"///\n"
4432"/// \\param __V\n"
4433"/// A 128-bit integer vector containing the state value.\n"
4434"/// \\param __R\n"
4435"/// A 128-bit integer vector containing the round key value.\n"
4436"/// \\returns A 128-bit integer vector containing the decrypted value.\n"
4437"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
4438"_mm_aesdeclast_si128(__m128i __V, __m128i __R)\n"
4439"{\n"
4440" return (__m128i)__builtin_ia32_aesdeclast128((__v2di)__V, (__v2di)__R);\n"
4441"}\n"
4442"\n"
4443"/// Applies the AES InvMixColumns() transformation to an expanded key\n"
4444"/// contained in the source operand, and writes the result to the\n"
4445"/// destination.\n"
4446"///\n"
4447"/// \\headerfile <x86intrin.h>\n"
4448"///\n"
4449"/// This intrinsic corresponds to the <c> VAESIMC </c> instruction.\n"
4450"///\n"
4451"/// \\param __V\n"
4452"/// A 128-bit integer vector containing the expanded key.\n"
4453"/// \\returns A 128-bit integer vector containing the transformed value.\n"
4454"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
4455"_mm_aesimc_si128(__m128i __V)\n"
4456"{\n"
4457" return (__m128i)__builtin_ia32_aesimc128((__v2di)__V);\n"
4458"}\n"
4459"\n"
4460"/// Generates a round key for AES encryption, operating on 128-bit data\n"
4461"/// specified in the first source operand and using an 8-bit round constant\n"
4462"/// specified by the second source operand, and writes the result to the\n"
4463"/// destination.\n"
4464"///\n"
4465"/// \\headerfile <x86intrin.h>\n"
4466"///\n"
4467"/// \\code\n"
4468"/// __m128i _mm_aeskeygenassist_si128(__m128i C, const int R);\n"
4469"/// \\endcode\n"
4470"///\n"
4471"/// This intrinsic corresponds to the <c> AESKEYGENASSIST </c> instruction.\n"
4472"///\n"
4473"/// \\param C\n"
4474"/// A 128-bit integer vector that is used to generate the AES encryption key.\n"
4475"/// \\param R\n"
4476"/// An 8-bit round constant used to generate the AES encryption key.\n"
4477"/// \\returns A 128-bit round key for AES encryption.\n"
4478"#define _mm_aeskeygenassist_si128(C, R) \\\n"
4479" (__m128i)__builtin_ia32_aeskeygenassist128((__v2di)(__m128i)(C), (int)(R))\n"
4480"\n"
4481"#undef __DEFAULT_FN_ATTRS\n"
4482"\n"
4483"#endif /* __WMMINTRIN_AES_H */\n"
4484"" } ,
4485 { "/builtins/__wmmintrin_pclmul.h" , "/*===---- __wmmintrin_pclmul.h - PCMUL intrinsics ---------------------------===\n"
4486" *\n"
4487" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
4488" * of this software and associated documentation files (the \"Software\"), to deal\n"
4489" * in the Software without restriction, including without limitation the rights\n"
4490" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
4491" * copies of the Software, and to permit persons to whom the Software is\n"
4492" * furnished to do so, subject to the following conditions:\n"
4493" *\n"
4494" * The above copyright notice and this permission notice shall be included in\n"
4495" * all copies or substantial portions of the Software.\n"
4496" *\n"
4497" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
4498" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
4499" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
4500" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
4501" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
4502" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
4503" * THE SOFTWARE.\n"
4504" *\n"
4505" *===-----------------------------------------------------------------------===\n"
4506" */\n"
4507"\n"
4508"#ifndef __WMMINTRIN_H\n"
4509"#error \"Never use <__wmmintrin_pclmul.h> directly; include <wmmintrin.h> instead.\"\n"
4510"#endif\n"
4511"\n"
4512"#ifndef __WMMINTRIN_PCLMUL_H\n"
4513"#define __WMMINTRIN_PCLMUL_H\n"
4514"\n"
4515"/// Multiplies two 64-bit integer values, which are selected from source\n"
4516"/// operands using the immediate-value operand. The multiplication is a\n"
4517"/// carry-less multiplication, and the 128-bit integer product is stored in\n"
4518"/// the destination.\n"
4519"///\n"
4520"/// \\headerfile <x86intrin.h>\n"
4521"///\n"
4522"/// \\code\n"
4523"/// __m128i _mm_clmulepi64_si128(__m128i __X, __m128i __Y, const int __I);\n"
4524"/// \\endcode\n"
4525"///\n"
4526"/// This intrinsic corresponds to the <c> VPCLMULQDQ </c> instruction.\n"
4527"///\n"
4528"/// \\param __X\n"
4529"/// A 128-bit vector of [2 x i64] containing one of the source operands.\n"
4530"/// \\param __Y\n"
4531"/// A 128-bit vector of [2 x i64] containing one of the source operands.\n"
4532"/// \\param __I\n"
4533"/// An immediate value specifying which 64-bit values to select from the\n"
4534"/// operands. Bit 0 is used to select a value from operand \\a __X, and bit\n"
4535"/// 4 is used to select a value from operand \\a __Y: \\n\n"
4536"/// Bit[0]=0 indicates that bits[63:0] of operand \\a __X are used. \\n\n"
4537"/// Bit[0]=1 indicates that bits[127:64] of operand \\a __X are used. \\n\n"
4538"/// Bit[4]=0 indicates that bits[63:0] of operand \\a __Y are used. \\n\n"
4539"/// Bit[4]=1 indicates that bits[127:64] of operand \\a __Y are used.\n"
4540"/// \\returns The 128-bit integer vector containing the result of the carry-less\n"
4541"/// multiplication of the selected 64-bit values.\n"
4542"#define _mm_clmulepi64_si128(X, Y, I) \\\n"
4543" ((__m128i)__builtin_ia32_pclmulqdq128((__v2di)(__m128i)(X), \\\n"
4544" (__v2di)(__m128i)(Y), (char)(I)))\n"
4545"\n"
4546"#endif /* __WMMINTRIN_PCLMUL_H */\n"
4547"" } ,
4548 { "/builtins/adxintrin.h" , "/*===---- adxintrin.h - ADX intrinsics -------------------------------------===\n"
4549" *\n"
4550" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
4551" * of this software and associated documentation files (the \"Software\"), to deal\n"
4552" * in the Software without restriction, including without limitation the rights\n"
4553" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
4554" * copies of the Software, and to permit persons to whom the Software is\n"
4555" * furnished to do so, subject to the following conditions:\n"
4556" *\n"
4557" * The above copyright notice and this permission notice shall be included in\n"
4558" * all copies or substantial portions of the Software.\n"
4559" *\n"
4560" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
4561" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
4562" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
4563" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
4564" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
4565" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
4566" * THE SOFTWARE.\n"
4567" *\n"
4568" *===-----------------------------------------------------------------------===\n"
4569" */\n"
4570"\n"
4571"#ifndef __IMMINTRIN_H\n"
4572"#error \"Never use <adxintrin.h> directly; include <immintrin.h> instead.\"\n"
4573"#endif\n"
4574"\n"
4575"#ifndef __ADXINTRIN_H\n"
4576"#define __ADXINTRIN_H\n"
4577"\n"
4578"/* Define the default attributes for the functions in this file. */\n"
4579"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))\n"
4580"\n"
4581"/* Intrinsics that are available only if __ADX__ defined */\n"
4582"static __inline unsigned char __attribute__((__always_inline__, __nodebug__, __target__(\"adx\")))\n"
4583"_addcarryx_u32(unsigned char __cf, unsigned int __x, unsigned int __y,\n"
4584" unsigned int *__p)\n"
4585"{\n"
4586" return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p);\n"
4587"}\n"
4588"\n"
4589"#ifdef __x86_64__\n"
4590"static __inline unsigned char __attribute__((__always_inline__, __nodebug__, __target__(\"adx\")))\n"
4591"_addcarryx_u64(unsigned char __cf, unsigned long long __x,\n"
4592" unsigned long long __y, unsigned long long *__p)\n"
4593"{\n"
4594" return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p);\n"
4595"}\n"
4596"#endif\n"
4597"\n"
4598"/* Intrinsics that are also available if __ADX__ undefined */\n"
4599"static __inline unsigned char __DEFAULT_FN_ATTRS\n"
4600"_addcarry_u32(unsigned char __cf, unsigned int __x, unsigned int __y,\n"
4601" unsigned int *__p)\n"
4602"{\n"
4603" return __builtin_ia32_addcarry_u32(__cf, __x, __y, __p);\n"
4604"}\n"
4605"\n"
4606"#ifdef __x86_64__\n"
4607"static __inline unsigned char __DEFAULT_FN_ATTRS\n"
4608"_addcarry_u64(unsigned char __cf, unsigned long long __x,\n"
4609" unsigned long long __y, unsigned long long *__p)\n"
4610"{\n"
4611" return __builtin_ia32_addcarry_u64(__cf, __x, __y, __p);\n"
4612"}\n"
4613"#endif\n"
4614"\n"
4615"static __inline unsigned char __DEFAULT_FN_ATTRS\n"
4616"_subborrow_u32(unsigned char __cf, unsigned int __x, unsigned int __y,\n"
4617" unsigned int *__p)\n"
4618"{\n"
4619" return __builtin_ia32_subborrow_u32(__cf, __x, __y, __p);\n"
4620"}\n"
4621"\n"
4622"#ifdef __x86_64__\n"
4623"static __inline unsigned char __DEFAULT_FN_ATTRS\n"
4624"_subborrow_u64(unsigned char __cf, unsigned long long __x,\n"
4625" unsigned long long __y, unsigned long long *__p)\n"
4626"{\n"
4627" return __builtin_ia32_subborrow_u64(__cf, __x, __y, __p);\n"
4628"}\n"
4629"#endif\n"
4630"\n"
4631"#undef __DEFAULT_FN_ATTRS\n"
4632"\n"
4633"#endif /* __ADXINTRIN_H */\n"
4634"" } ,
4635 { "/builtins/ammintrin.h" , "/*===---- ammintrin.h - SSE4a intrinsics -----------------------------------===\n"
4636" *\n"
4637" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
4638" * of this software and associated documentation files (the \"Software\"), to deal\n"
4639" * in the Software without restriction, including without limitation the rights\n"
4640" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
4641" * copies of the Software, and to permit persons to whom the Software is\n"
4642" * furnished to do so, subject to the following conditions:\n"
4643" *\n"
4644" * The above copyright notice and this permission notice shall be included in\n"
4645" * all copies or substantial portions of the Software.\n"
4646" *\n"
4647" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
4648" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
4649" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
4650" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
4651" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
4652" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
4653" * THE SOFTWARE.\n"
4654" *\n"
4655" *===-----------------------------------------------------------------------===\n"
4656" */\n"
4657"\n"
4658"#ifndef __AMMINTRIN_H\n"
4659"#define __AMMINTRIN_H\n"
4660"\n"
4661"#include <pmmintrin.h>\n"
4662"\n"
4663"/* Define the default attributes for the functions in this file. */\n"
4664"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"sse4a\"), __min_vector_width__(128)))\n"
4665"\n"
4666"/// Extracts the specified bits from the lower 64 bits of the 128-bit\n"
4667"/// integer vector operand at the index \\a idx and of the length \\a len.\n"
4668"///\n"
4669"/// \\headerfile <x86intrin.h>\n"
4670"///\n"
4671"/// \\code\n"
4672"/// __m128i _mm_extracti_si64(__m128i x, const int len, const int idx);\n"
4673"/// \\endcode\n"
4674"///\n"
4675"/// This intrinsic corresponds to the <c> EXTRQ </c> instruction.\n"
4676"///\n"
4677"/// \\param x\n"
4678"/// The value from which bits are extracted.\n"
4679"/// \\param len\n"
4680"/// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0]\n"
4681"/// are zero, the length is interpreted as 64.\n"
4682"/// \\param idx\n"
4683"/// Bits [5:0] specify the index of the least significant bit; the other\n"
4684"/// bits are ignored. If the sum of the index and length is greater than 64,\n"
4685"/// the result is undefined. If the length and index are both zero, bits\n"
4686"/// [63:0] of parameter \\a x are extracted. If the length is zero but the\n"
4687"/// index is non-zero, the result is undefined.\n"
4688"/// \\returns A 128-bit integer vector whose lower 64 bits contain the bits\n"
4689"/// extracted from the source operand.\n"
4690"#define _mm_extracti_si64(x, len, idx) \\\n"
4691" ((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(x), \\\n"
4692" (char)(len), (char)(idx)))\n"
4693"\n"
4694"/// Extracts the specified bits from the lower 64 bits of the 128-bit\n"
4695"/// integer vector operand at the index and of the length specified by\n"
4696"/// \\a __y.\n"
4697"///\n"
4698"/// \\headerfile <x86intrin.h>\n"
4699"///\n"
4700"/// This intrinsic corresponds to the <c> EXTRQ </c> instruction.\n"
4701"///\n"
4702"/// \\param __x\n"
4703"/// The value from which bits are extracted.\n"
4704"/// \\param __y\n"
4705"/// Specifies the index of the least significant bit at [13:8] and the\n"
4706"/// length at [5:0]; all other bits are ignored. If bits [5:0] are zero, the\n"
4707"/// length is interpreted as 64. If the sum of the index and length is\n"
4708"/// greater than 64, the result is undefined. If the length and index are\n"
4709"/// both zero, bits [63:0] of parameter \\a __x are extracted. If the length\n"
4710"/// is zero but the index is non-zero, the result is undefined.\n"
4711"/// \\returns A 128-bit vector whose lower 64 bits contain the bits extracted\n"
4712"/// from the source operand.\n"
4713"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
4714"_mm_extract_si64(__m128i __x, __m128i __y)\n"
4715"{\n"
4716" return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y);\n"
4717"}\n"
4718"\n"
4719"/// Inserts bits of a specified length from the source integer vector\n"
4720"/// \\a y into the lower 64 bits of the destination integer vector \\a x at\n"
4721"/// the index \\a idx and of the length \\a len.\n"
4722"///\n"
4723"/// \\headerfile <x86intrin.h>\n"
4724"///\n"
4725"/// \\code\n"
4726"/// __m128i _mm_inserti_si64(__m128i x, __m128i y, const int len,\n"
4727"/// const int idx);\n"
4728"/// \\endcode\n"
4729"///\n"
4730"/// This intrinsic corresponds to the <c> INSERTQ </c> instruction.\n"
4731"///\n"
4732"/// \\param x\n"
4733"/// The destination operand where bits will be inserted. The inserted bits\n"
4734"/// are defined by the length \\a len and by the index \\a idx specifying the\n"
4735"/// least significant bit.\n"
4736"/// \\param y\n"
4737"/// The source operand containing the bits to be extracted. The extracted\n"
4738"/// bits are the least significant bits of operand \\a y of length \\a len.\n"
4739"/// \\param len\n"
4740"/// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0]\n"
4741"/// are zero, the length is interpreted as 64.\n"
4742"/// \\param idx\n"
4743"/// Bits [5:0] specify the index of the least significant bit; the other\n"
4744"/// bits are ignored. If the sum of the index and length is greater than 64,\n"
4745"/// the result is undefined. If the length and index are both zero, bits\n"
4746"/// [63:0] of parameter \\a y are inserted into parameter \\a x. If the length\n"
4747"/// is zero but the index is non-zero, the result is undefined.\n"
4748"/// \\returns A 128-bit integer vector containing the original lower 64-bits of\n"
4749"/// destination operand \\a x with the specified bitfields replaced by the\n"
4750"/// lower bits of source operand \\a y. The upper 64 bits of the return value\n"
4751"/// are undefined.\n"
4752"#define _mm_inserti_si64(x, y, len, idx) \\\n"
4753" ((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(x), \\\n"
4754" (__v2di)(__m128i)(y), \\\n"
4755" (char)(len), (char)(idx)))\n"
4756"\n"
4757"/// Inserts bits of a specified length from the source integer vector\n"
4758"/// \\a __y into the lower 64 bits of the destination integer vector \\a __x\n"
4759"/// at the index and of the length specified by \\a __y.\n"
4760"///\n"
4761"/// \\headerfile <x86intrin.h>\n"
4762"///\n"
4763"/// This intrinsic corresponds to the <c> INSERTQ </c> instruction.\n"
4764"///\n"
4765"/// \\param __x\n"
4766"/// The destination operand where bits will be inserted. The inserted bits\n"
4767"/// are defined by the length and by the index of the least significant bit\n"
4768"/// specified by operand \\a __y.\n"
4769"/// \\param __y\n"
4770"/// The source operand containing the bits to be extracted. The extracted\n"
4771"/// bits are the least significant bits of operand \\a __y with length\n"
4772"/// specified by bits [69:64]. These are inserted into the destination at the\n"
4773"/// index specified by bits [77:72]; all other bits are ignored. If bits\n"
4774"/// [69:64] are zero, the length is interpreted as 64. If the sum of the\n"
4775"/// index and length is greater than 64, the result is undefined. If the\n"
4776"/// length and index are both zero, bits [63:0] of parameter \\a __y are\n"
4777"/// inserted into parameter \\a __x. If the length is zero but the index is\n"
4778"/// non-zero, the result is undefined.\n"
4779"/// \\returns A 128-bit integer vector containing the original lower 64-bits of\n"
4780"/// destination operand \\a __x with the specified bitfields replaced by the\n"
4781"/// lower bits of source operand \\a __y. The upper 64 bits of the return\n"
4782"/// value are undefined.\n"
4783"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
4784"_mm_insert_si64(__m128i __x, __m128i __y)\n"
4785"{\n"
4786" return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y);\n"
4787"}\n"
4788"\n"
4789"/// Stores a 64-bit double-precision value in a 64-bit memory location.\n"
4790"/// To minimize caching, the data is flagged as non-temporal (unlikely to be\n"
4791"/// used again soon).\n"
4792"///\n"
4793"/// \\headerfile <x86intrin.h>\n"
4794"///\n"
4795"/// This intrinsic corresponds to the <c> MOVNTSD </c> instruction.\n"
4796"///\n"
4797"/// \\param __p\n"
4798"/// The 64-bit memory location used to store the register value.\n"
4799"/// \\param __a\n"
4800"/// The 64-bit double-precision floating-point register value to be stored.\n"
4801"static __inline__ void __DEFAULT_FN_ATTRS\n"
4802"_mm_stream_sd(double *__p, __m128d __a)\n"
4803"{\n"
4804" __builtin_ia32_movntsd(__p, (__v2df)__a);\n"
4805"}\n"
4806"\n"
4807"/// Stores a 32-bit single-precision floating-point value in a 32-bit\n"
4808"/// memory location. To minimize caching, the data is flagged as\n"
4809"/// non-temporal (unlikely to be used again soon).\n"
4810"///\n"
4811"/// \\headerfile <x86intrin.h>\n"
4812"///\n"
4813"/// This intrinsic corresponds to the <c> MOVNTSS </c> instruction.\n"
4814"///\n"
4815"/// \\param __p\n"
4816"/// The 32-bit memory location used to store the register value.\n"
4817"/// \\param __a\n"
4818"/// The 32-bit single-precision floating-point register value to be stored.\n"
4819"static __inline__ void __DEFAULT_FN_ATTRS\n"
4820"_mm_stream_ss(float *__p, __m128 __a)\n"
4821"{\n"
4822" __builtin_ia32_movntss(__p, (__v4sf)__a);\n"
4823"}\n"
4824"\n"
4825"#undef __DEFAULT_FN_ATTRS\n"
4826"\n"
4827"#endif /* __AMMINTRIN_H */\n"
4828"" } ,
4829 { "/builtins/arm64intr.h" , "/*===---- arm64intr.h - ARM64 Windows intrinsics -------------------------------===\n"
4830" *\n"
4831" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
4832" * of this software and associated documentation files (the \"Software\"), to deal\n"
4833" * in the Software without restriction, including without limitation the rights\n"
4834" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
4835" * copies of the Software, and to permit persons to whom the Software is\n"
4836" * furnished to do so, subject to the following conditions:\n"
4837" *\n"
4838" * The above copyright notice and this permission notice shall be included in\n"
4839" * all copies or substantial portions of the Software.\n"
4840" *\n"
4841" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
4842" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
4843" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
4844" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
4845" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
4846" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
4847" * THE SOFTWARE.\n"
4848" *\n"
4849" *===-----------------------------------------------------------------------===\n"
4850" */\n"
4851"\n"
4852"/* Only include this if we're compiling for the windows platform. */\n"
4853"#ifndef _MSC_VER\n"
4854"#include_next <arm64intr.h>\n"
4855"#else\n"
4856"\n"
4857"#ifndef __ARM64INTR_H\n"
4858"#define __ARM64INTR_H\n"
4859"\n"
4860"typedef enum\n"
4861"{\n"
4862" _ARM64_BARRIER_SY = 0xF,\n"
4863" _ARM64_BARRIER_ST = 0xE,\n"
4864" _ARM64_BARRIER_LD = 0xD,\n"
4865" _ARM64_BARRIER_ISH = 0xB,\n"
4866" _ARM64_BARRIER_ISHST = 0xA,\n"
4867" _ARM64_BARRIER_ISHLD = 0x9,\n"
4868" _ARM64_BARRIER_NSH = 0x7,\n"
4869" _ARM64_BARRIER_NSHST = 0x6,\n"
4870" _ARM64_BARRIER_NSHLD = 0x5,\n"
4871" _ARM64_BARRIER_OSH = 0x3,\n"
4872" _ARM64_BARRIER_OSHST = 0x2,\n"
4873" _ARM64_BARRIER_OSHLD = 0x1\n"
4874"} _ARM64INTR_BARRIER_TYPE;\n"
4875"\n"
4876"#endif /* __ARM64INTR_H */\n"
4877"#endif /* _MSC_VER */\n"
4878"" } ,
4879 { "/builtins/arm_acle.h" , "/*===---- arm_acle.h - ARM Non-Neon intrinsics -----------------------------===\n"
4880" *\n"
4881" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
4882" * of this software and associated documentation files (the \"Software\"), to deal\n"
4883" * in the Software without restriction, including without limitation the rights\n"
4884" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
4885" * copies of the Software, and to permit persons to whom the Software is\n"
4886" * furnished to do so, subject to the following conditions:\n"
4887" *\n"
4888" * The above copyright notice and this permission notice shall be included in\n"
4889" * all copies or substantial portions of the Software.\n"
4890" *\n"
4891" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
4892" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
4893" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
4894" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
4895" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
4896" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
4897" * THE SOFTWARE.\n"
4898" *\n"
4899" *===-----------------------------------------------------------------------===\n"
4900" */\n"
4901"\n"
4902"#ifndef __ARM_ACLE_H\n"
4903"#define __ARM_ACLE_H\n"
4904"\n"
4905"#ifndef __ARM_ACLE\n"
4906"#error \"ACLE intrinsics support not enabled.\"\n"
4907"#endif\n"
4908"\n"
4909"#include <stdint.h>\n"
4910"\n"
4911"#if defined(__cplusplus)\n"
4912"extern \"C\" {\n"
4913"#endif\n"
4914"\n"
4915"/* 8 SYNCHRONIZATION, BARRIER AND HINT INTRINSICS */\n"
4916"/* 8.3 Memory barriers */\n"
4917"#if !defined(_MSC_VER)\n"
4918"#define __dmb(i) __builtin_arm_dmb(i)\n"
4919"#define __dsb(i) __builtin_arm_dsb(i)\n"
4920"#define __isb(i) __builtin_arm_isb(i)\n"
4921"#endif\n"
4922"\n"
4923"/* 8.4 Hints */\n"
4924"\n"
4925"#if !defined(_MSC_VER)\n"
4926"static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfi(void) {\n"
4927" __builtin_arm_wfi();\n"
4928"}\n"
4929"\n"
4930"static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfe(void) {\n"
4931" __builtin_arm_wfe();\n"
4932"}\n"
4933"\n"
4934"static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sev(void) {\n"
4935" __builtin_arm_sev();\n"
4936"}\n"
4937"\n"
4938"static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sevl(void) {\n"
4939" __builtin_arm_sevl();\n"
4940"}\n"
4941"\n"
4942"static __inline__ void __attribute__((__always_inline__, __nodebug__)) __yield(void) {\n"
4943" __builtin_arm_yield();\n"
4944"}\n"
4945"#endif\n"
4946"\n"
4947"#if __ARM_32BIT_STATE\n"
4948"#define __dbg(t) __builtin_arm_dbg(t)\n"
4949"#endif\n"
4950"\n"
4951"/* 8.5 Swap */\n"
4952"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
4953"__swp(uint32_t __x, volatile uint32_t *__p) {\n"
4954" uint32_t v;\n"
4955" do\n"
4956" v = __builtin_arm_ldrex(__p);\n"
4957" while (__builtin_arm_strex(__x, __p));\n"
4958" return v;\n"
4959"}\n"
4960"\n"
4961"/* 8.6 Memory prefetch intrinsics */\n"
4962"/* 8.6.1 Data prefetch */\n"
4963"#define __pld(addr) __pldx(0, 0, 0, addr)\n"
4964"\n"
4965"#if __ARM_32BIT_STATE\n"
4966"#define __pldx(access_kind, cache_level, retention_policy, addr) \\\n"
4967" __builtin_arm_prefetch(addr, access_kind, 1)\n"
4968"#else\n"
4969"#define __pldx(access_kind, cache_level, retention_policy, addr) \\\n"
4970" __builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1)\n"
4971"#endif\n"
4972"\n"
4973"/* 8.6.2 Instruction prefetch */\n"
4974"#define __pli(addr) __plix(0, 0, addr)\n"
4975"\n"
4976"#if __ARM_32BIT_STATE\n"
4977"#define __plix(cache_level, retention_policy, addr) \\\n"
4978" __builtin_arm_prefetch(addr, 0, 0)\n"
4979"#else\n"
4980"#define __plix(cache_level, retention_policy, addr) \\\n"
4981" __builtin_arm_prefetch(addr, 0, cache_level, retention_policy, 0)\n"
4982"#endif\n"
4983"\n"
4984"/* 8.7 NOP */\n"
4985"static __inline__ void __attribute__((__always_inline__, __nodebug__)) __nop(void) {\n"
4986" __builtin_arm_nop();\n"
4987"}\n"
4988"\n"
4989"/* 9 DATA-PROCESSING INTRINSICS */\n"
4990"/* 9.2 Miscellaneous data-processing intrinsics */\n"
4991"/* ROR */\n"
4992"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
4993"__ror(uint32_t __x, uint32_t __y) {\n"
4994" __y %= 32;\n"
4995" if (__y == 0)\n"
4996" return __x;\n"
4997" return (__x >> __y) | (__x << (32 - __y));\n"
4998"}\n"
4999"\n"
5000"static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))\n"
5001"__rorll(uint64_t __x, uint32_t __y) {\n"
5002" __y %= 64;\n"
5003" if (__y == 0)\n"
5004" return __x;\n"
5005" return (__x >> __y) | (__x << (64 - __y));\n"
5006"}\n"
5007"\n"
5008"static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))\n"
5009"__rorl(unsigned long __x, uint32_t __y) {\n"
5010"#if __SIZEOF_LONG__ == 4\n"
5011" return __ror(__x, __y);\n"
5012"#else\n"
5013" return __rorll(__x, __y);\n"
5014"#endif\n"
5015"}\n"
5016"\n"
5017"\n"
5018"/* CLZ */\n"
5019"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5020"__clz(uint32_t __t) {\n"
5021" return __builtin_clz(__t);\n"
5022"}\n"
5023"\n"
5024"static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))\n"
5025"__clzl(unsigned long __t) {\n"
5026" return __builtin_clzl(__t);\n"
5027"}\n"
5028"\n"
5029"static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))\n"
5030"__clzll(uint64_t __t) {\n"
5031" return __builtin_clzll(__t);\n"
5032"}\n"
5033"\n"
5034"/* REV */\n"
5035"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5036"__rev(uint32_t __t) {\n"
5037" return __builtin_bswap32(__t);\n"
5038"}\n"
5039"\n"
5040"static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))\n"
5041"__revl(unsigned long __t) {\n"
5042"#if __SIZEOF_LONG__ == 4\n"
5043" return __builtin_bswap32(__t);\n"
5044"#else\n"
5045" return __builtin_bswap64(__t);\n"
5046"#endif\n"
5047"}\n"
5048"\n"
5049"static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))\n"
5050"__revll(uint64_t __t) {\n"
5051" return __builtin_bswap64(__t);\n"
5052"}\n"
5053"\n"
5054"/* REV16 */\n"
5055"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5056"__rev16(uint32_t __t) {\n"
5057" return __ror(__rev(__t), 16);\n"
5058"}\n"
5059"\n"
5060"static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))\n"
5061"__rev16ll(uint64_t __t) {\n"
5062" return (((uint64_t)__rev16(__t >> 32)) << 32) | __rev16(__t);\n"
5063"}\n"
5064"\n"
5065"static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))\n"
5066"__rev16l(unsigned long __t) {\n"
5067"#if __SIZEOF_LONG__ == 4\n"
5068" return __rev16(__t);\n"
5069"#else\n"
5070" return __rev16ll(__t);\n"
5071"#endif\n"
5072"}\n"
5073"\n"
5074"/* REVSH */\n"
5075"static __inline__ int16_t __attribute__((__always_inline__, __nodebug__))\n"
5076"__revsh(int16_t __t) {\n"
5077" return __builtin_bswap16(__t);\n"
5078"}\n"
5079"\n"
5080"/* RBIT */\n"
5081"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5082"__rbit(uint32_t __t) {\n"
5083" return __builtin_arm_rbit(__t);\n"
5084"}\n"
5085"\n"
5086"static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))\n"
5087"__rbitll(uint64_t __t) {\n"
5088"#if __ARM_32BIT_STATE\n"
5089" return (((uint64_t)__builtin_arm_rbit(__t)) << 32) |\n"
5090" __builtin_arm_rbit(__t >> 32);\n"
5091"#else\n"
5092" return __builtin_arm_rbit64(__t);\n"
5093"#endif\n"
5094"}\n"
5095"\n"
5096"static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))\n"
5097"__rbitl(unsigned long __t) {\n"
5098"#if __SIZEOF_LONG__ == 4\n"
5099" return __rbit(__t);\n"
5100"#else\n"
5101" return __rbitll(__t);\n"
5102"#endif\n"
5103"}\n"
5104"\n"
5105"/*\n"
5106" * 9.3 16-bit multiplications\n"
5107" */\n"
5108"#if __ARM_FEATURE_DSP\n"
5109"static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))\n"
5110"__smulbb(int32_t __a, int32_t __b) {\n"
5111" return __builtin_arm_smulbb(__a, __b);\n"
5112"}\n"
5113"static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))\n"
5114"__smulbt(int32_t __a, int32_t __b) {\n"
5115" return __builtin_arm_smulbt(__a, __b);\n"
5116"}\n"
5117"static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))\n"
5118"__smultb(int32_t __a, int32_t __b) {\n"
5119" return __builtin_arm_smultb(__a, __b);\n"
5120"}\n"
5121"static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))\n"
5122"__smultt(int32_t __a, int32_t __b) {\n"
5123" return __builtin_arm_smultt(__a, __b);\n"
5124"}\n"
5125"static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))\n"
5126"__smulwb(int32_t __a, int32_t __b) {\n"
5127" return __builtin_arm_smulwb(__a, __b);\n"
5128"}\n"
5129"static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))\n"
5130"__smulwt(int32_t __a, int32_t __b) {\n"
5131" return __builtin_arm_smulwt(__a, __b);\n"
5132"}\n"
5133"#endif\n"
5134"\n"
5135"/*\n"
5136" * 9.4 Saturating intrinsics\n"
5137" *\n"
5138" * FIXME: Change guard to their corrosponding __ARM_FEATURE flag when Q flag\n"
5139" * intrinsics are implemented and the flag is enabled.\n"
5140" */\n"
5141"/* 9.4.1 Width-specified saturation intrinsics */\n"
5142"#if __ARM_FEATURE_SAT\n"
5143"#define __ssat(x, y) __builtin_arm_ssat(x, y)\n"
5144"#define __usat(x, y) __builtin_arm_usat(x, y)\n"
5145"#endif\n"
5146"\n"
5147"/* 9.4.2 Saturating addition and subtraction intrinsics */\n"
5148"#if __ARM_FEATURE_DSP\n"
5149"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5150"__qadd(int32_t __t, int32_t __v) {\n"
5151" return __builtin_arm_qadd(__t, __v);\n"
5152"}\n"
5153"\n"
5154"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5155"__qsub(int32_t __t, int32_t __v) {\n"
5156" return __builtin_arm_qsub(__t, __v);\n"
5157"}\n"
5158"\n"
5159"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5160"__qdbl(int32_t __t) {\n"
5161" return __builtin_arm_qadd(__t, __t);\n"
5162"}\n"
5163"#endif\n"
5164"\n"
5165"/* 9.4.3 Accumultating multiplications */\n"
5166"#if __ARM_FEATURE_DSP\n"
5167"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5168"__smlabb(int32_t __a, int32_t __b, int32_t __c) {\n"
5169" return __builtin_arm_smlabb(__a, __b, __c);\n"
5170"}\n"
5171"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5172"__smlabt(int32_t __a, int32_t __b, int32_t __c) {\n"
5173" return __builtin_arm_smlabt(__a, __b, __c);\n"
5174"}\n"
5175"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5176"__smlatb(int32_t __a, int32_t __b, int32_t __c) {\n"
5177" return __builtin_arm_smlatb(__a, __b, __c);\n"
5178"}\n"
5179"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5180"__smlatt(int32_t __a, int32_t __b, int32_t __c) {\n"
5181" return __builtin_arm_smlatt(__a, __b, __c);\n"
5182"}\n"
5183"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5184"__smlawb(int32_t __a, int32_t __b, int32_t __c) {\n"
5185" return __builtin_arm_smlawb(__a, __b, __c);\n"
5186"}\n"
5187"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5188"__smlawt(int32_t __a, int32_t __b, int32_t __c) {\n"
5189" return __builtin_arm_smlawt(__a, __b, __c);\n"
5190"}\n"
5191"#endif\n"
5192"\n"
5193"\n"
5194"/* 9.5.4 Parallel 16-bit saturation */\n"
5195"#if __ARM_FEATURE_SIMD32\n"
5196"#define __ssat16(x, y) __builtin_arm_ssat16(x, y)\n"
5197"#define __usat16(x, y) __builtin_arm_usat16(x, y)\n"
5198"#endif\n"
5199"\n"
5200"/* 9.5.5 Packing and unpacking */\n"
5201"#if __ARM_FEATURE_SIMD32\n"
5202"typedef int32_t int8x4_t;\n"
5203"typedef int32_t int16x2_t;\n"
5204"typedef uint32_t uint8x4_t;\n"
5205"typedef uint32_t uint16x2_t;\n"
5206"\n"
5207"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5208"__sxtab16(int16x2_t __a, int8x4_t __b) {\n"
5209" return __builtin_arm_sxtab16(__a, __b);\n"
5210"}\n"
5211"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5212"__sxtb16(int8x4_t __a) {\n"
5213" return __builtin_arm_sxtb16(__a);\n"
5214"}\n"
5215"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5216"__uxtab16(int16x2_t __a, int8x4_t __b) {\n"
5217" return __builtin_arm_uxtab16(__a, __b);\n"
5218"}\n"
5219"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5220"__uxtb16(int8x4_t __a) {\n"
5221" return __builtin_arm_uxtb16(__a);\n"
5222"}\n"
5223"#endif\n"
5224"\n"
5225"/* 9.5.6 Parallel selection */\n"
5226"#if __ARM_FEATURE_SIMD32\n"
5227"static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5228"__sel(uint8x4_t __a, uint8x4_t __b) {\n"
5229" return __builtin_arm_sel(__a, __b);\n"
5230"}\n"
5231"#endif\n"
5232"\n"
5233"/* 9.5.7 Parallel 8-bit addition and subtraction */\n"
5234"#if __ARM_FEATURE_SIMD32\n"
5235"static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5236"__qadd8(int8x4_t __a, int8x4_t __b) {\n"
5237" return __builtin_arm_qadd8(__a, __b);\n"
5238"}\n"
5239"static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5240"__qsub8(int8x4_t __a, int8x4_t __b) {\n"
5241" return __builtin_arm_qsub8(__a, __b);\n"
5242"}\n"
5243"static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5244"__sadd8(int8x4_t __a, int8x4_t __b) {\n"
5245" return __builtin_arm_sadd8(__a, __b);\n"
5246"}\n"
5247"static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5248"__shadd8(int8x4_t __a, int8x4_t __b) {\n"
5249" return __builtin_arm_shadd8(__a, __b);\n"
5250"}\n"
5251"static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5252"__shsub8(int8x4_t __a, int8x4_t __b) {\n"
5253" return __builtin_arm_shsub8(__a, __b);\n"
5254"}\n"
5255"static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5256"__ssub8(int8x4_t __a, int8x4_t __b) {\n"
5257" return __builtin_arm_ssub8(__a, __b);\n"
5258"}\n"
5259"static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5260"__uadd8(uint8x4_t __a, uint8x4_t __b) {\n"
5261" return __builtin_arm_uadd8(__a, __b);\n"
5262"}\n"
5263"static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5264"__uhadd8(uint8x4_t __a, uint8x4_t __b) {\n"
5265" return __builtin_arm_uhadd8(__a, __b);\n"
5266"}\n"
5267"static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5268"__uhsub8(uint8x4_t __a, uint8x4_t __b) {\n"
5269" return __builtin_arm_uhsub8(__a, __b);\n"
5270"}\n"
5271"static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5272"__uqadd8(uint8x4_t __a, uint8x4_t __b) {\n"
5273" return __builtin_arm_uqadd8(__a, __b);\n"
5274"}\n"
5275"static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5276"__uqsub8(uint8x4_t __a, uint8x4_t __b) {\n"
5277" return __builtin_arm_uqsub8(__a, __b);\n"
5278"}\n"
5279"static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))\n"
5280"__usub8(uint8x4_t __a, uint8x4_t __b) {\n"
5281" return __builtin_arm_usub8(__a, __b);\n"
5282"}\n"
5283"#endif\n"
5284"\n"
5285"/* 9.5.8 Sum of 8-bit absolute differences */\n"
5286"#if __ARM_FEATURE_SIMD32\n"
5287"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5288"__usad8(uint8x4_t __a, uint8x4_t __b) {\n"
5289" return __builtin_arm_usad8(__a, __b);\n"
5290"}\n"
5291"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5292"__usada8(uint8x4_t __a, uint8x4_t __b, uint32_t __c) {\n"
5293" return __builtin_arm_usada8(__a, __b, __c);\n"
5294"}\n"
5295"#endif\n"
5296"\n"
5297"/* 9.5.9 Parallel 16-bit addition and subtraction */\n"
5298"#if __ARM_FEATURE_SIMD32\n"
5299"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5300"__qadd16(int16x2_t __a, int16x2_t __b) {\n"
5301" return __builtin_arm_qadd16(__a, __b);\n"
5302"}\n"
5303"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5304"__qasx(int16x2_t __a, int16x2_t __b) {\n"
5305" return __builtin_arm_qasx(__a, __b);\n"
5306"}\n"
5307"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5308"__qsax(int16x2_t __a, int16x2_t __b) {\n"
5309" return __builtin_arm_qsax(__a, __b);\n"
5310"}\n"
5311"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5312"__qsub16(int16x2_t __a, int16x2_t __b) {\n"
5313" return __builtin_arm_qsub16(__a, __b);\n"
5314"}\n"
5315"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5316"__sadd16(int16x2_t __a, int16x2_t __b) {\n"
5317" return __builtin_arm_sadd16(__a, __b);\n"
5318"}\n"
5319"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5320"__sasx(int16x2_t __a, int16x2_t __b) {\n"
5321" return __builtin_arm_sasx(__a, __b);\n"
5322"}\n"
5323"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5324"__shadd16(int16x2_t __a, int16x2_t __b) {\n"
5325" return __builtin_arm_shadd16(__a, __b);\n"
5326"}\n"
5327"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5328"__shasx(int16x2_t __a, int16x2_t __b) {\n"
5329" return __builtin_arm_shasx(__a, __b);\n"
5330"}\n"
5331"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5332"__shsax(int16x2_t __a, int16x2_t __b) {\n"
5333" return __builtin_arm_shsax(__a, __b);\n"
5334"}\n"
5335"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5336"__shsub16(int16x2_t __a, int16x2_t __b) {\n"
5337" return __builtin_arm_shsub16(__a, __b);\n"
5338"}\n"
5339"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5340"__ssax(int16x2_t __a, int16x2_t __b) {\n"
5341" return __builtin_arm_ssax(__a, __b);\n"
5342"}\n"
5343"static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5344"__ssub16(int16x2_t __a, int16x2_t __b) {\n"
5345" return __builtin_arm_ssub16(__a, __b);\n"
5346"}\n"
5347"static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5348"__uadd16(uint16x2_t __a, uint16x2_t __b) {\n"
5349" return __builtin_arm_uadd16(__a, __b);\n"
5350"}\n"
5351"static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5352"__uasx(uint16x2_t __a, uint16x2_t __b) {\n"
5353" return __builtin_arm_uasx(__a, __b);\n"
5354"}\n"
5355"static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5356"__uhadd16(uint16x2_t __a, uint16x2_t __b) {\n"
5357" return __builtin_arm_uhadd16(__a, __b);\n"
5358"}\n"
5359"static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5360"__uhasx(uint16x2_t __a, uint16x2_t __b) {\n"
5361" return __builtin_arm_uhasx(__a, __b);\n"
5362"}\n"
5363"static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5364"__uhsax(uint16x2_t __a, uint16x2_t __b) {\n"
5365" return __builtin_arm_uhsax(__a, __b);\n"
5366"}\n"
5367"static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5368"__uhsub16(uint16x2_t __a, uint16x2_t __b) {\n"
5369" return __builtin_arm_uhsub16(__a, __b);\n"
5370"}\n"
5371"static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5372"__uqadd16(uint16x2_t __a, uint16x2_t __b) {\n"
5373" return __builtin_arm_uqadd16(__a, __b);\n"
5374"}\n"
5375"static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5376"__uqasx(uint16x2_t __a, uint16x2_t __b) {\n"
5377" return __builtin_arm_uqasx(__a, __b);\n"
5378"}\n"
5379"static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5380"__uqsax(uint16x2_t __a, uint16x2_t __b) {\n"
5381" return __builtin_arm_uqsax(__a, __b);\n"
5382"}\n"
5383"static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5384"__uqsub16(uint16x2_t __a, uint16x2_t __b) {\n"
5385" return __builtin_arm_uqsub16(__a, __b);\n"
5386"}\n"
5387"static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5388"__usax(uint16x2_t __a, uint16x2_t __b) {\n"
5389" return __builtin_arm_usax(__a, __b);\n"
5390"}\n"
5391"static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n"
5392"__usub16(uint16x2_t __a, uint16x2_t __b) {\n"
5393" return __builtin_arm_usub16(__a, __b);\n"
5394"}\n"
5395"#endif\n"
5396"\n"
5397"/* 9.5.10 Parallel 16-bit multiplications */\n"
5398"#if __ARM_FEATURE_SIMD32\n"
5399"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5400"__smlad(int16x2_t __a, int16x2_t __b, int32_t __c) {\n"
5401" return __builtin_arm_smlad(__a, __b, __c);\n"
5402"}\n"
5403"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5404"__smladx(int16x2_t __a, int16x2_t __b, int32_t __c) {\n"
5405" return __builtin_arm_smladx(__a, __b, __c);\n"
5406"}\n"
5407"static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))\n"
5408"__smlald(int16x2_t __a, int16x2_t __b, int64_t __c) {\n"
5409" return __builtin_arm_smlald(__a, __b, __c);\n"
5410"}\n"
5411"static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))\n"
5412"__smlaldx(int16x2_t __a, int16x2_t __b, int64_t __c) {\n"
5413" return __builtin_arm_smlaldx(__a, __b, __c);\n"
5414"}\n"
5415"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5416"__smlsd(int16x2_t __a, int16x2_t __b, int32_t __c) {\n"
5417" return __builtin_arm_smlsd(__a, __b, __c);\n"
5418"}\n"
5419"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5420"__smlsdx(int16x2_t __a, int16x2_t __b, int32_t __c) {\n"
5421" return __builtin_arm_smlsdx(__a, __b, __c);\n"
5422"}\n"
5423"static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))\n"
5424"__smlsld(int16x2_t __a, int16x2_t __b, int64_t __c) {\n"
5425" return __builtin_arm_smlsld(__a, __b, __c);\n"
5426"}\n"
5427"static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))\n"
5428"__smlsldx(int16x2_t __a, int16x2_t __b, int64_t __c) {\n"
5429" return __builtin_arm_smlsldx(__a, __b, __c);\n"
5430"}\n"
5431"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5432"__smuad(int16x2_t __a, int16x2_t __b) {\n"
5433" return __builtin_arm_smuad(__a, __b);\n"
5434"}\n"
5435"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5436"__smuadx(int16x2_t __a, int16x2_t __b) {\n"
5437" return __builtin_arm_smuadx(__a, __b);\n"
5438"}\n"
5439"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5440"__smusd(int16x2_t __a, int16x2_t __b) {\n"
5441" return __builtin_arm_smusd(__a, __b);\n"
5442"}\n"
5443"static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n"
5444"__smusdx(int16x2_t __a, int16x2_t __b) {\n"
5445" return __builtin_arm_smusdx(__a, __b);\n"
5446"}\n"
5447"#endif\n"
5448"\n"
5449"/* 9.7 CRC32 intrinsics */\n"
5450"#if __ARM_FEATURE_CRC32\n"
5451"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5452"__crc32b(uint32_t __a, uint8_t __b) {\n"
5453" return __builtin_arm_crc32b(__a, __b);\n"
5454"}\n"
5455"\n"
5456"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5457"__crc32h(uint32_t __a, uint16_t __b) {\n"
5458" return __builtin_arm_crc32h(__a, __b);\n"
5459"}\n"
5460"\n"
5461"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5462"__crc32w(uint32_t __a, uint32_t __b) {\n"
5463" return __builtin_arm_crc32w(__a, __b);\n"
5464"}\n"
5465"\n"
5466"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5467"__crc32d(uint32_t __a, uint64_t __b) {\n"
5468" return __builtin_arm_crc32d(__a, __b);\n"
5469"}\n"
5470"\n"
5471"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5472"__crc32cb(uint32_t __a, uint8_t __b) {\n"
5473" return __builtin_arm_crc32cb(__a, __b);\n"
5474"}\n"
5475"\n"
5476"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5477"__crc32ch(uint32_t __a, uint16_t __b) {\n"
5478" return __builtin_arm_crc32ch(__a, __b);\n"
5479"}\n"
5480"\n"
5481"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5482"__crc32cw(uint32_t __a, uint32_t __b) {\n"
5483" return __builtin_arm_crc32cw(__a, __b);\n"
5484"}\n"
5485"\n"
5486"static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n"
5487"__crc32cd(uint32_t __a, uint64_t __b) {\n"
5488" return __builtin_arm_crc32cd(__a, __b);\n"
5489"}\n"
5490"#endif\n"
5491"\n"
5492"/* 10.1 Special register intrinsics */\n"
5493"#define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg)\n"
5494"#define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg)\n"
5495"#define __arm_rsrp(sysreg) __builtin_arm_rsrp(sysreg)\n"
5496"#define __arm_wsr(sysreg, v) __builtin_arm_wsr(sysreg, v)\n"
5497"#define __arm_wsr64(sysreg, v) __builtin_arm_wsr64(sysreg, v)\n"
5498"#define __arm_wsrp(sysreg, v) __builtin_arm_wsrp(sysreg, v)\n"
5499"\n"
5500"#if defined(__cplusplus)\n"
5501"}\n"
5502"#endif\n"
5503"\n"
5504"#endif /* __ARM_ACLE_H */\n"
5505"" } ,
5506 { "/builtins/arm_fp16.h" , "/*===---- arm_fp16.h - ARM FP16 intrinsics ---------------------------------===\n"
5507" *\n"
5508" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
5509" * of this software and associated documentation files (the \"Software\"), to deal\n"
5510" * in the Software without restriction, including without limitation the rights\n"
5511" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
5512" * copies of the Software, and to permit persons to whom the Software is\n"
5513" * furnished to do so, subject to the following conditions:\n"
5514" *\n"
5515" * The above copyright notice and this permission notice shall be included in\n"
5516" * all copies or substantial portions of the Software.\n"
5517" *\n"
5518" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
5519" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
5520" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
5521" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
5522" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
5523" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
5524" * THE SOFTWARE.\n"
5525" *\n"
5526" *===-----------------------------------------------------------------------===\n"
5527" */\n"
5528"\n"
5529"#ifndef __ARM_FP16_H\n"
5530"#define __ARM_FP16_H\n"
5531"\n"
5532"#include <stdint.h>\n"
5533"\n"
5534"typedef __fp16 float16_t;\n"
5535"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n"
5536"\n"
5537"#if defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) && defined(__aarch64__)\n"
5538"#ifdef __LITTLE_ENDIAN__\n"
5539"#define vabdh_f16(__p0, __p1) __extension__ ({ \\\n"
5540" float16_t __s0 = __p0; \\\n"
5541" float16_t __s1 = __p1; \\\n"
5542" float16_t __ret; \\\n"
5543" __ret = (float16_t) __builtin_neon_vabdh_f16(__s0, __s1); \\\n"
5544" __ret; \\\n"
5545"})\n"
5546"#else\n"
5547"#define vabdh_f16(__p0, __p1) __extension__ ({ \\\n"
5548" float16_t __s0 = __p0; \\\n"
5549" float16_t __s1 = __p1; \\\n"
5550" float16_t __ret; \\\n"
5551" __ret = (float16_t) __builtin_neon_vabdh_f16(__s0, __s1); \\\n"
5552" __ret; \\\n"
5553"})\n"
5554"#endif\n"
5555"\n"
5556"#ifdef __LITTLE_ENDIAN__\n"
5557"#define vabsh_f16(__p0) __extension__ ({ \\\n"
5558" float16_t __s0 = __p0; \\\n"
5559" float16_t __ret; \\\n"
5560" __ret = (float16_t) __builtin_neon_vabsh_f16(__s0); \\\n"
5561" __ret; \\\n"
5562"})\n"
5563"#else\n"
5564"#define vabsh_f16(__p0) __extension__ ({ \\\n"
5565" float16_t __s0 = __p0; \\\n"
5566" float16_t __ret; \\\n"
5567" __ret = (float16_t) __builtin_neon_vabsh_f16(__s0); \\\n"
5568" __ret; \\\n"
5569"})\n"
5570"#endif\n"
5571"\n"
5572"#ifdef __LITTLE_ENDIAN__\n"
5573"#define vaddh_f16(__p0, __p1) __extension__ ({ \\\n"
5574" float16_t __s0 = __p0; \\\n"
5575" float16_t __s1 = __p1; \\\n"
5576" float16_t __ret; \\\n"
5577" __ret = (float16_t) __builtin_neon_vaddh_f16(__s0, __s1); \\\n"
5578" __ret; \\\n"
5579"})\n"
5580"#else\n"
5581"#define vaddh_f16(__p0, __p1) __extension__ ({ \\\n"
5582" float16_t __s0 = __p0; \\\n"
5583" float16_t __s1 = __p1; \\\n"
5584" float16_t __ret; \\\n"
5585" __ret = (float16_t) __builtin_neon_vaddh_f16(__s0, __s1); \\\n"
5586" __ret; \\\n"
5587"})\n"
5588"#endif\n"
5589"\n"
5590"#ifdef __LITTLE_ENDIAN__\n"
5591"#define vcageh_f16(__p0, __p1) __extension__ ({ \\\n"
5592" float16_t __s0 = __p0; \\\n"
5593" float16_t __s1 = __p1; \\\n"
5594" uint16_t __ret; \\\n"
5595" __ret = (uint16_t) __builtin_neon_vcageh_f16(__s0, __s1); \\\n"
5596" __ret; \\\n"
5597"})\n"
5598"#else\n"
5599"#define vcageh_f16(__p0, __p1) __extension__ ({ \\\n"
5600" float16_t __s0 = __p0; \\\n"
5601" float16_t __s1 = __p1; \\\n"
5602" uint16_t __ret; \\\n"
5603" __ret = (uint16_t) __builtin_neon_vcageh_f16(__s0, __s1); \\\n"
5604" __ret; \\\n"
5605"})\n"
5606"#endif\n"
5607"\n"
5608"#ifdef __LITTLE_ENDIAN__\n"
5609"#define vcagth_f16(__p0, __p1) __extension__ ({ \\\n"
5610" float16_t __s0 = __p0; \\\n"
5611" float16_t __s1 = __p1; \\\n"
5612" uint16_t __ret; \\\n"
5613" __ret = (uint16_t) __builtin_neon_vcagth_f16(__s0, __s1); \\\n"
5614" __ret; \\\n"
5615"})\n"
5616"#else\n"
5617"#define vcagth_f16(__p0, __p1) __extension__ ({ \\\n"
5618" float16_t __s0 = __p0; \\\n"
5619" float16_t __s1 = __p1; \\\n"
5620" uint16_t __ret; \\\n"
5621" __ret = (uint16_t) __builtin_neon_vcagth_f16(__s0, __s1); \\\n"
5622" __ret; \\\n"
5623"})\n"
5624"#endif\n"
5625"\n"
5626"#ifdef __LITTLE_ENDIAN__\n"
5627"#define vcaleh_f16(__p0, __p1) __extension__ ({ \\\n"
5628" float16_t __s0 = __p0; \\\n"
5629" float16_t __s1 = __p1; \\\n"
5630" uint16_t __ret; \\\n"
5631" __ret = (uint16_t) __builtin_neon_vcaleh_f16(__s0, __s1); \\\n"
5632" __ret; \\\n"
5633"})\n"
5634"#else\n"
5635"#define vcaleh_f16(__p0, __p1) __extension__ ({ \\\n"
5636" float16_t __s0 = __p0; \\\n"
5637" float16_t __s1 = __p1; \\\n"
5638" uint16_t __ret; \\\n"
5639" __ret = (uint16_t) __builtin_neon_vcaleh_f16(__s0, __s1); \\\n"
5640" __ret; \\\n"
5641"})\n"
5642"#endif\n"
5643"\n"
5644"#ifdef __LITTLE_ENDIAN__\n"
5645"#define vcalth_f16(__p0, __p1) __extension__ ({ \\\n"
5646" float16_t __s0 = __p0; \\\n"
5647" float16_t __s1 = __p1; \\\n"
5648" uint16_t __ret; \\\n"
5649" __ret = (uint16_t) __builtin_neon_vcalth_f16(__s0, __s1); \\\n"
5650" __ret; \\\n"
5651"})\n"
5652"#else\n"
5653"#define vcalth_f16(__p0, __p1) __extension__ ({ \\\n"
5654" float16_t __s0 = __p0; \\\n"
5655" float16_t __s1 = __p1; \\\n"
5656" uint16_t __ret; \\\n"
5657" __ret = (uint16_t) __builtin_neon_vcalth_f16(__s0, __s1); \\\n"
5658" __ret; \\\n"
5659"})\n"
5660"#endif\n"
5661"\n"
5662"#ifdef __LITTLE_ENDIAN__\n"
5663"#define vceqh_f16(__p0, __p1) __extension__ ({ \\\n"
5664" float16_t __s0 = __p0; \\\n"
5665" float16_t __s1 = __p1; \\\n"
5666" uint16_t __ret; \\\n"
5667" __ret = (uint16_t) __builtin_neon_vceqh_f16(__s0, __s1); \\\n"
5668" __ret; \\\n"
5669"})\n"
5670"#else\n"
5671"#define vceqh_f16(__p0, __p1) __extension__ ({ \\\n"
5672" float16_t __s0 = __p0; \\\n"
5673" float16_t __s1 = __p1; \\\n"
5674" uint16_t __ret; \\\n"
5675" __ret = (uint16_t) __builtin_neon_vceqh_f16(__s0, __s1); \\\n"
5676" __ret; \\\n"
5677"})\n"
5678"#endif\n"
5679"\n"
5680"#ifdef __LITTLE_ENDIAN__\n"
5681"#define vceqzh_f16(__p0) __extension__ ({ \\\n"
5682" float16_t __s0 = __p0; \\\n"
5683" uint16_t __ret; \\\n"
5684" __ret = (uint16_t) __builtin_neon_vceqzh_f16(__s0); \\\n"
5685" __ret; \\\n"
5686"})\n"
5687"#else\n"
5688"#define vceqzh_f16(__p0) __extension__ ({ \\\n"
5689" float16_t __s0 = __p0; \\\n"
5690" uint16_t __ret; \\\n"
5691" __ret = (uint16_t) __builtin_neon_vceqzh_f16(__s0); \\\n"
5692" __ret; \\\n"
5693"})\n"
5694"#endif\n"
5695"\n"
5696"#ifdef __LITTLE_ENDIAN__\n"
5697"#define vcgeh_f16(__p0, __p1) __extension__ ({ \\\n"
5698" float16_t __s0 = __p0; \\\n"
5699" float16_t __s1 = __p1; \\\n"
5700" uint16_t __ret; \\\n"
5701" __ret = (uint16_t) __builtin_neon_vcgeh_f16(__s0, __s1); \\\n"
5702" __ret; \\\n"
5703"})\n"
5704"#else\n"
5705"#define vcgeh_f16(__p0, __p1) __extension__ ({ \\\n"
5706" float16_t __s0 = __p0; \\\n"
5707" float16_t __s1 = __p1; \\\n"
5708" uint16_t __ret; \\\n"
5709" __ret = (uint16_t) __builtin_neon_vcgeh_f16(__s0, __s1); \\\n"
5710" __ret; \\\n"
5711"})\n"
5712"#endif\n"
5713"\n"
5714"#ifdef __LITTLE_ENDIAN__\n"
5715"#define vcgezh_f16(__p0) __extension__ ({ \\\n"
5716" float16_t __s0 = __p0; \\\n"
5717" uint16_t __ret; \\\n"
5718" __ret = (uint16_t) __builtin_neon_vcgezh_f16(__s0); \\\n"
5719" __ret; \\\n"
5720"})\n"
5721"#else\n"
5722"#define vcgezh_f16(__p0) __extension__ ({ \\\n"
5723" float16_t __s0 = __p0; \\\n"
5724" uint16_t __ret; \\\n"
5725" __ret = (uint16_t) __builtin_neon_vcgezh_f16(__s0); \\\n"
5726" __ret; \\\n"
5727"})\n"
5728"#endif\n"
5729"\n"
5730"#ifdef __LITTLE_ENDIAN__\n"
5731"#define vcgth_f16(__p0, __p1) __extension__ ({ \\\n"
5732" float16_t __s0 = __p0; \\\n"
5733" float16_t __s1 = __p1; \\\n"
5734" uint16_t __ret; \\\n"
5735" __ret = (uint16_t) __builtin_neon_vcgth_f16(__s0, __s1); \\\n"
5736" __ret; \\\n"
5737"})\n"
5738"#else\n"
5739"#define vcgth_f16(__p0, __p1) __extension__ ({ \\\n"
5740" float16_t __s0 = __p0; \\\n"
5741" float16_t __s1 = __p1; \\\n"
5742" uint16_t __ret; \\\n"
5743" __ret = (uint16_t) __builtin_neon_vcgth_f16(__s0, __s1); \\\n"
5744" __ret; \\\n"
5745"})\n"
5746"#endif\n"
5747"\n"
5748"#ifdef __LITTLE_ENDIAN__\n"
5749"#define vcgtzh_f16(__p0) __extension__ ({ \\\n"
5750" float16_t __s0 = __p0; \\\n"
5751" uint16_t __ret; \\\n"
5752" __ret = (uint16_t) __builtin_neon_vcgtzh_f16(__s0); \\\n"
5753" __ret; \\\n"
5754"})\n"
5755"#else\n"
5756"#define vcgtzh_f16(__p0) __extension__ ({ \\\n"
5757" float16_t __s0 = __p0; \\\n"
5758" uint16_t __ret; \\\n"
5759" __ret = (uint16_t) __builtin_neon_vcgtzh_f16(__s0); \\\n"
5760" __ret; \\\n"
5761"})\n"
5762"#endif\n"
5763"\n"
5764"#ifdef __LITTLE_ENDIAN__\n"
5765"#define vcleh_f16(__p0, __p1) __extension__ ({ \\\n"
5766" float16_t __s0 = __p0; \\\n"
5767" float16_t __s1 = __p1; \\\n"
5768" uint16_t __ret; \\\n"
5769" __ret = (uint16_t) __builtin_neon_vcleh_f16(__s0, __s1); \\\n"
5770" __ret; \\\n"
5771"})\n"
5772"#else\n"
5773"#define vcleh_f16(__p0, __p1) __extension__ ({ \\\n"
5774" float16_t __s0 = __p0; \\\n"
5775" float16_t __s1 = __p1; \\\n"
5776" uint16_t __ret; \\\n"
5777" __ret = (uint16_t) __builtin_neon_vcleh_f16(__s0, __s1); \\\n"
5778" __ret; \\\n"
5779"})\n"
5780"#endif\n"
5781"\n"
5782"#ifdef __LITTLE_ENDIAN__\n"
5783"#define vclezh_f16(__p0) __extension__ ({ \\\n"
5784" float16_t __s0 = __p0; \\\n"
5785" uint16_t __ret; \\\n"
5786" __ret = (uint16_t) __builtin_neon_vclezh_f16(__s0); \\\n"
5787" __ret; \\\n"
5788"})\n"
5789"#else\n"
5790"#define vclezh_f16(__p0) __extension__ ({ \\\n"
5791" float16_t __s0 = __p0; \\\n"
5792" uint16_t __ret; \\\n"
5793" __ret = (uint16_t) __builtin_neon_vclezh_f16(__s0); \\\n"
5794" __ret; \\\n"
5795"})\n"
5796"#endif\n"
5797"\n"
5798"#ifdef __LITTLE_ENDIAN__\n"
5799"#define vclth_f16(__p0, __p1) __extension__ ({ \\\n"
5800" float16_t __s0 = __p0; \\\n"
5801" float16_t __s1 = __p1; \\\n"
5802" uint16_t __ret; \\\n"
5803" __ret = (uint16_t) __builtin_neon_vclth_f16(__s0, __s1); \\\n"
5804" __ret; \\\n"
5805"})\n"
5806"#else\n"
5807"#define vclth_f16(__p0, __p1) __extension__ ({ \\\n"
5808" float16_t __s0 = __p0; \\\n"
5809" float16_t __s1 = __p1; \\\n"
5810" uint16_t __ret; \\\n"
5811" __ret = (uint16_t) __builtin_neon_vclth_f16(__s0, __s1); \\\n"
5812" __ret; \\\n"
5813"})\n"
5814"#endif\n"
5815"\n"
5816"#ifdef __LITTLE_ENDIAN__\n"
5817"#define vcltzh_f16(__p0) __extension__ ({ \\\n"
5818" float16_t __s0 = __p0; \\\n"
5819" uint16_t __ret; \\\n"
5820" __ret = (uint16_t) __builtin_neon_vcltzh_f16(__s0); \\\n"
5821" __ret; \\\n"
5822"})\n"
5823"#else\n"
5824"#define vcltzh_f16(__p0) __extension__ ({ \\\n"
5825" float16_t __s0 = __p0; \\\n"
5826" uint16_t __ret; \\\n"
5827" __ret = (uint16_t) __builtin_neon_vcltzh_f16(__s0); \\\n"
5828" __ret; \\\n"
5829"})\n"
5830"#endif\n"
5831"\n"
5832"#ifdef __LITTLE_ENDIAN__\n"
5833"#define vcvth_n_s16_f16(__p0, __p1) __extension__ ({ \\\n"
5834" float16_t __s0 = __p0; \\\n"
5835" int16_t __ret; \\\n"
5836" __ret = (int16_t) __builtin_neon_vcvth_n_s16_f16(__s0, __p1); \\\n"
5837" __ret; \\\n"
5838"})\n"
5839"#else\n"
5840"#define vcvth_n_s16_f16(__p0, __p1) __extension__ ({ \\\n"
5841" float16_t __s0 = __p0; \\\n"
5842" int16_t __ret; \\\n"
5843" __ret = (int16_t) __builtin_neon_vcvth_n_s16_f16(__s0, __p1); \\\n"
5844" __ret; \\\n"
5845"})\n"
5846"#endif\n"
5847"\n"
5848"#ifdef __LITTLE_ENDIAN__\n"
5849"#define vcvth_n_s32_f16(__p0, __p1) __extension__ ({ \\\n"
5850" float16_t __s0 = __p0; \\\n"
5851" int32_t __ret; \\\n"
5852" __ret = (int32_t) __builtin_neon_vcvth_n_s32_f16(__s0, __p1); \\\n"
5853" __ret; \\\n"
5854"})\n"
5855"#else\n"
5856"#define vcvth_n_s32_f16(__p0, __p1) __extension__ ({ \\\n"
5857" float16_t __s0 = __p0; \\\n"
5858" int32_t __ret; \\\n"
5859" __ret = (int32_t) __builtin_neon_vcvth_n_s32_f16(__s0, __p1); \\\n"
5860" __ret; \\\n"
5861"})\n"
5862"#endif\n"
5863"\n"
5864"#ifdef __LITTLE_ENDIAN__\n"
5865"#define vcvth_n_s64_f16(__p0, __p1) __extension__ ({ \\\n"
5866" float16_t __s0 = __p0; \\\n"
5867" int64_t __ret; \\\n"
5868" __ret = (int64_t) __builtin_neon_vcvth_n_s64_f16(__s0, __p1); \\\n"
5869" __ret; \\\n"
5870"})\n"
5871"#else\n"
5872"#define vcvth_n_s64_f16(__p0, __p1) __extension__ ({ \\\n"
5873" float16_t __s0 = __p0; \\\n"
5874" int64_t __ret; \\\n"
5875" __ret = (int64_t) __builtin_neon_vcvth_n_s64_f16(__s0, __p1); \\\n"
5876" __ret; \\\n"
5877"})\n"
5878"#endif\n"
5879"\n"
5880"#ifdef __LITTLE_ENDIAN__\n"
5881"#define vcvth_n_u16_f16(__p0, __p1) __extension__ ({ \\\n"
5882" float16_t __s0 = __p0; \\\n"
5883" uint16_t __ret; \\\n"
5884" __ret = (uint16_t) __builtin_neon_vcvth_n_u16_f16(__s0, __p1); \\\n"
5885" __ret; \\\n"
5886"})\n"
5887"#else\n"
5888"#define vcvth_n_u16_f16(__p0, __p1) __extension__ ({ \\\n"
5889" float16_t __s0 = __p0; \\\n"
5890" uint16_t __ret; \\\n"
5891" __ret = (uint16_t) __builtin_neon_vcvth_n_u16_f16(__s0, __p1); \\\n"
5892" __ret; \\\n"
5893"})\n"
5894"#endif\n"
5895"\n"
5896"#ifdef __LITTLE_ENDIAN__\n"
5897"#define vcvth_n_u32_f16(__p0, __p1) __extension__ ({ \\\n"
5898" float16_t __s0 = __p0; \\\n"
5899" uint32_t __ret; \\\n"
5900" __ret = (uint32_t) __builtin_neon_vcvth_n_u32_f16(__s0, __p1); \\\n"
5901" __ret; \\\n"
5902"})\n"
5903"#else\n"
5904"#define vcvth_n_u32_f16(__p0, __p1) __extension__ ({ \\\n"
5905" float16_t __s0 = __p0; \\\n"
5906" uint32_t __ret; \\\n"
5907" __ret = (uint32_t) __builtin_neon_vcvth_n_u32_f16(__s0, __p1); \\\n"
5908" __ret; \\\n"
5909"})\n"
5910"#endif\n"
5911"\n"
5912"#ifdef __LITTLE_ENDIAN__\n"
5913"#define vcvth_n_u64_f16(__p0, __p1) __extension__ ({ \\\n"
5914" float16_t __s0 = __p0; \\\n"
5915" uint64_t __ret; \\\n"
5916" __ret = (uint64_t) __builtin_neon_vcvth_n_u64_f16(__s0, __p1); \\\n"
5917" __ret; \\\n"
5918"})\n"
5919"#else\n"
5920"#define vcvth_n_u64_f16(__p0, __p1) __extension__ ({ \\\n"
5921" float16_t __s0 = __p0; \\\n"
5922" uint64_t __ret; \\\n"
5923" __ret = (uint64_t) __builtin_neon_vcvth_n_u64_f16(__s0, __p1); \\\n"
5924" __ret; \\\n"
5925"})\n"
5926"#endif\n"
5927"\n"
5928"#ifdef __LITTLE_ENDIAN__\n"
5929"#define vcvth_s16_f16(__p0) __extension__ ({ \\\n"
5930" float16_t __s0 = __p0; \\\n"
5931" int16_t __ret; \\\n"
5932" __ret = (int16_t) __builtin_neon_vcvth_s16_f16(__s0); \\\n"
5933" __ret; \\\n"
5934"})\n"
5935"#else\n"
5936"#define vcvth_s16_f16(__p0) __extension__ ({ \\\n"
5937" float16_t __s0 = __p0; \\\n"
5938" int16_t __ret; \\\n"
5939" __ret = (int16_t) __builtin_neon_vcvth_s16_f16(__s0); \\\n"
5940" __ret; \\\n"
5941"})\n"
5942"#endif\n"
5943"\n"
5944"#ifdef __LITTLE_ENDIAN__\n"
5945"#define vcvth_s32_f16(__p0) __extension__ ({ \\\n"
5946" float16_t __s0 = __p0; \\\n"
5947" int32_t __ret; \\\n"
5948" __ret = (int32_t) __builtin_neon_vcvth_s32_f16(__s0); \\\n"
5949" __ret; \\\n"
5950"})\n"
5951"#else\n"
5952"#define vcvth_s32_f16(__p0) __extension__ ({ \\\n"
5953" float16_t __s0 = __p0; \\\n"
5954" int32_t __ret; \\\n"
5955" __ret = (int32_t) __builtin_neon_vcvth_s32_f16(__s0); \\\n"
5956" __ret; \\\n"
5957"})\n"
5958"#endif\n"
5959"\n"
5960"#ifdef __LITTLE_ENDIAN__\n"
5961"#define vcvth_s64_f16(__p0) __extension__ ({ \\\n"
5962" float16_t __s0 = __p0; \\\n"
5963" int64_t __ret; \\\n"
5964" __ret = (int64_t) __builtin_neon_vcvth_s64_f16(__s0); \\\n"
5965" __ret; \\\n"
5966"})\n"
5967"#else\n"
5968"#define vcvth_s64_f16(__p0) __extension__ ({ \\\n"
5969" float16_t __s0 = __p0; \\\n"
5970" int64_t __ret; \\\n"
5971" __ret = (int64_t) __builtin_neon_vcvth_s64_f16(__s0); \\\n"
5972" __ret; \\\n"
5973"})\n"
5974"#endif\n"
5975"\n"
5976"#ifdef __LITTLE_ENDIAN__\n"
5977"#define vcvth_u16_f16(__p0) __extension__ ({ \\\n"
5978" float16_t __s0 = __p0; \\\n"
5979" uint16_t __ret; \\\n"
5980" __ret = (uint16_t) __builtin_neon_vcvth_u16_f16(__s0); \\\n"
5981" __ret; \\\n"
5982"})\n"
5983"#else\n"
5984"#define vcvth_u16_f16(__p0) __extension__ ({ \\\n"
5985" float16_t __s0 = __p0; \\\n"
5986" uint16_t __ret; \\\n"
5987" __ret = (uint16_t) __builtin_neon_vcvth_u16_f16(__s0); \\\n"
5988" __ret; \\\n"
5989"})\n"
5990"#endif\n"
5991"\n"
5992"#ifdef __LITTLE_ENDIAN__\n"
5993"#define vcvth_u32_f16(__p0) __extension__ ({ \\\n"
5994" float16_t __s0 = __p0; \\\n"
5995" uint32_t __ret; \\\n"
5996" __ret = (uint32_t) __builtin_neon_vcvth_u32_f16(__s0); \\\n"
5997" __ret; \\\n"
5998"})\n"
5999"#else\n"
6000"#define vcvth_u32_f16(__p0) __extension__ ({ \\\n"
6001" float16_t __s0 = __p0; \\\n"
6002" uint32_t __ret; \\\n"
6003" __ret = (uint32_t) __builtin_neon_vcvth_u32_f16(__s0); \\\n"
6004" __ret; \\\n"
6005"})\n"
6006"#endif\n"
6007"\n"
6008"#ifdef __LITTLE_ENDIAN__\n"
6009"#define vcvth_u64_f16(__p0) __extension__ ({ \\\n"
6010" float16_t __s0 = __p0; \\\n"
6011" uint64_t __ret; \\\n"
6012" __ret = (uint64_t) __builtin_neon_vcvth_u64_f16(__s0); \\\n"
6013" __ret; \\\n"
6014"})\n"
6015"#else\n"
6016"#define vcvth_u64_f16(__p0) __extension__ ({ \\\n"
6017" float16_t __s0 = __p0; \\\n"
6018" uint64_t __ret; \\\n"
6019" __ret = (uint64_t) __builtin_neon_vcvth_u64_f16(__s0); \\\n"
6020" __ret; \\\n"
6021"})\n"
6022"#endif\n"
6023"\n"
6024"#ifdef __LITTLE_ENDIAN__\n"
6025"#define vcvtah_s16_f16(__p0) __extension__ ({ \\\n"
6026" float16_t __s0 = __p0; \\\n"
6027" int16_t __ret; \\\n"
6028" __ret = (int16_t) __builtin_neon_vcvtah_s16_f16(__s0); \\\n"
6029" __ret; \\\n"
6030"})\n"
6031"#else\n"
6032"#define vcvtah_s16_f16(__p0) __extension__ ({ \\\n"
6033" float16_t __s0 = __p0; \\\n"
6034" int16_t __ret; \\\n"
6035" __ret = (int16_t) __builtin_neon_vcvtah_s16_f16(__s0); \\\n"
6036" __ret; \\\n"
6037"})\n"
6038"#endif\n"
6039"\n"
6040"#ifdef __LITTLE_ENDIAN__\n"
6041"#define vcvtah_s32_f16(__p0) __extension__ ({ \\\n"
6042" float16_t __s0 = __p0; \\\n"
6043" int32_t __ret; \\\n"
6044" __ret = (int32_t) __builtin_neon_vcvtah_s32_f16(__s0); \\\n"
6045" __ret; \\\n"
6046"})\n"
6047"#else\n"
6048"#define vcvtah_s32_f16(__p0) __extension__ ({ \\\n"
6049" float16_t __s0 = __p0; \\\n"
6050" int32_t __ret; \\\n"
6051" __ret = (int32_t) __builtin_neon_vcvtah_s32_f16(__s0); \\\n"
6052" __ret; \\\n"
6053"})\n"
6054"#endif\n"
6055"\n"
6056"#ifdef __LITTLE_ENDIAN__\n"
6057"#define vcvtah_s64_f16(__p0) __extension__ ({ \\\n"
6058" float16_t __s0 = __p0; \\\n"
6059" int64_t __ret; \\\n"
6060" __ret = (int64_t) __builtin_neon_vcvtah_s64_f16(__s0); \\\n"
6061" __ret; \\\n"
6062"})\n"
6063"#else\n"
6064"#define vcvtah_s64_f16(__p0) __extension__ ({ \\\n"
6065" float16_t __s0 = __p0; \\\n"
6066" int64_t __ret; \\\n"
6067" __ret = (int64_t) __builtin_neon_vcvtah_s64_f16(__s0); \\\n"
6068" __ret; \\\n"
6069"})\n"
6070"#endif\n"
6071"\n"
6072"#ifdef __LITTLE_ENDIAN__\n"
6073"#define vcvtah_u16_f16(__p0) __extension__ ({ \\\n"
6074" float16_t __s0 = __p0; \\\n"
6075" uint16_t __ret; \\\n"
6076" __ret = (uint16_t) __builtin_neon_vcvtah_u16_f16(__s0); \\\n"
6077" __ret; \\\n"
6078"})\n"
6079"#else\n"
6080"#define vcvtah_u16_f16(__p0) __extension__ ({ \\\n"
6081" float16_t __s0 = __p0; \\\n"
6082" uint16_t __ret; \\\n"
6083" __ret = (uint16_t) __builtin_neon_vcvtah_u16_f16(__s0); \\\n"
6084" __ret; \\\n"
6085"})\n"
6086"#endif\n"
6087"\n"
6088"#ifdef __LITTLE_ENDIAN__\n"
6089"#define vcvtah_u32_f16(__p0) __extension__ ({ \\\n"
6090" float16_t __s0 = __p0; \\\n"
6091" uint32_t __ret; \\\n"
6092" __ret = (uint32_t) __builtin_neon_vcvtah_u32_f16(__s0); \\\n"
6093" __ret; \\\n"
6094"})\n"
6095"#else\n"
6096"#define vcvtah_u32_f16(__p0) __extension__ ({ \\\n"
6097" float16_t __s0 = __p0; \\\n"
6098" uint32_t __ret; \\\n"
6099" __ret = (uint32_t) __builtin_neon_vcvtah_u32_f16(__s0); \\\n"
6100" __ret; \\\n"
6101"})\n"
6102"#endif\n"
6103"\n"
6104"#ifdef __LITTLE_ENDIAN__\n"
6105"#define vcvtah_u64_f16(__p0) __extension__ ({ \\\n"
6106" float16_t __s0 = __p0; \\\n"
6107" uint64_t __ret; \\\n"
6108" __ret = (uint64_t) __builtin_neon_vcvtah_u64_f16(__s0); \\\n"
6109" __ret; \\\n"
6110"})\n"
6111"#else\n"
6112"#define vcvtah_u64_f16(__p0) __extension__ ({ \\\n"
6113" float16_t __s0 = __p0; \\\n"
6114" uint64_t __ret; \\\n"
6115" __ret = (uint64_t) __builtin_neon_vcvtah_u64_f16(__s0); \\\n"
6116" __ret; \\\n"
6117"})\n"
6118"#endif\n"
6119"\n"
6120"#ifdef __LITTLE_ENDIAN__\n"
6121"__ai float16_t vcvth_f16_u32(uint32_t __p0) {\n"
6122" float16_t __ret;\n"
6123" __ret = (float16_t) __builtin_neon_vcvth_f16_u32(__p0);\n"
6124" return __ret;\n"
6125"}\n"
6126"#else\n"
6127"__ai float16_t vcvth_f16_u32(uint32_t __p0) {\n"
6128" float16_t __ret;\n"
6129" __ret = (float16_t) __builtin_neon_vcvth_f16_u32(__p0);\n"
6130" return __ret;\n"
6131"}\n"
6132"#endif\n"
6133"\n"
6134"#ifdef __LITTLE_ENDIAN__\n"
6135"__ai float16_t vcvth_f16_u64(uint64_t __p0) {\n"
6136" float16_t __ret;\n"
6137" __ret = (float16_t) __builtin_neon_vcvth_f16_u64(__p0);\n"
6138" return __ret;\n"
6139"}\n"
6140"#else\n"
6141"__ai float16_t vcvth_f16_u64(uint64_t __p0) {\n"
6142" float16_t __ret;\n"
6143" __ret = (float16_t) __builtin_neon_vcvth_f16_u64(__p0);\n"
6144" return __ret;\n"
6145"}\n"
6146"#endif\n"
6147"\n"
6148"#ifdef __LITTLE_ENDIAN__\n"
6149"__ai float16_t vcvth_f16_u16(uint16_t __p0) {\n"
6150" float16_t __ret;\n"
6151" __ret = (float16_t) __builtin_neon_vcvth_f16_u16(__p0);\n"
6152" return __ret;\n"
6153"}\n"
6154"#else\n"
6155"__ai float16_t vcvth_f16_u16(uint16_t __p0) {\n"
6156" float16_t __ret;\n"
6157" __ret = (float16_t) __builtin_neon_vcvth_f16_u16(__p0);\n"
6158" return __ret;\n"
6159"}\n"
6160"#endif\n"
6161"\n"
6162"#ifdef __LITTLE_ENDIAN__\n"
6163"__ai float16_t vcvth_f16_s32(int32_t __p0) {\n"
6164" float16_t __ret;\n"
6165" __ret = (float16_t) __builtin_neon_vcvth_f16_s32(__p0);\n"
6166" return __ret;\n"
6167"}\n"
6168"#else\n"
6169"__ai float16_t vcvth_f16_s32(int32_t __p0) {\n"
6170" float16_t __ret;\n"
6171" __ret = (float16_t) __builtin_neon_vcvth_f16_s32(__p0);\n"
6172" return __ret;\n"
6173"}\n"
6174"#endif\n"
6175"\n"
6176"#ifdef __LITTLE_ENDIAN__\n"
6177"__ai float16_t vcvth_f16_s64(int64_t __p0) {\n"
6178" float16_t __ret;\n"
6179" __ret = (float16_t) __builtin_neon_vcvth_f16_s64(__p0);\n"
6180" return __ret;\n"
6181"}\n"
6182"#else\n"
6183"__ai float16_t vcvth_f16_s64(int64_t __p0) {\n"
6184" float16_t __ret;\n"
6185" __ret = (float16_t) __builtin_neon_vcvth_f16_s64(__p0);\n"
6186" return __ret;\n"
6187"}\n"
6188"#endif\n"
6189"\n"
6190"#ifdef __LITTLE_ENDIAN__\n"
6191"__ai float16_t vcvth_f16_s16(int16_t __p0) {\n"
6192" float16_t __ret;\n"
6193" __ret = (float16_t) __builtin_neon_vcvth_f16_s16(__p0);\n"
6194" return __ret;\n"
6195"}\n"
6196"#else\n"
6197"__ai float16_t vcvth_f16_s16(int16_t __p0) {\n"
6198" float16_t __ret;\n"
6199" __ret = (float16_t) __builtin_neon_vcvth_f16_s16(__p0);\n"
6200" return __ret;\n"
6201"}\n"
6202"#endif\n"
6203"\n"
6204"#ifdef __LITTLE_ENDIAN__\n"
6205"#define vcvth_n_f16_u32(__p0, __p1) __extension__ ({ \\\n"
6206" uint32_t __s0 = __p0; \\\n"
6207" float16_t __ret; \\\n"
6208" __ret = (float16_t) __builtin_neon_vcvth_n_f16_u32(__s0, __p1); \\\n"
6209" __ret; \\\n"
6210"})\n"
6211"#else\n"
6212"#define vcvth_n_f16_u32(__p0, __p1) __extension__ ({ \\\n"
6213" uint32_t __s0 = __p0; \\\n"
6214" float16_t __ret; \\\n"
6215" __ret = (float16_t) __builtin_neon_vcvth_n_f16_u32(__s0, __p1); \\\n"
6216" __ret; \\\n"
6217"})\n"
6218"#endif\n"
6219"\n"
6220"#ifdef __LITTLE_ENDIAN__\n"
6221"#define vcvth_n_f16_u64(__p0, __p1) __extension__ ({ \\\n"
6222" uint64_t __s0 = __p0; \\\n"
6223" float16_t __ret; \\\n"
6224" __ret = (float16_t) __builtin_neon_vcvth_n_f16_u64(__s0, __p1); \\\n"
6225" __ret; \\\n"
6226"})\n"
6227"#else\n"
6228"#define vcvth_n_f16_u64(__p0, __p1) __extension__ ({ \\\n"
6229" uint64_t __s0 = __p0; \\\n"
6230" float16_t __ret; \\\n"
6231" __ret = (float16_t) __builtin_neon_vcvth_n_f16_u64(__s0, __p1); \\\n"
6232" __ret; \\\n"
6233"})\n"
6234"#endif\n"
6235"\n"
6236"#ifdef __LITTLE_ENDIAN__\n"
6237"#define vcvth_n_f16_u16(__p0, __p1) __extension__ ({ \\\n"
6238" uint16_t __s0 = __p0; \\\n"
6239" float16_t __ret; \\\n"
6240" __ret = (float16_t) __builtin_neon_vcvth_n_f16_u16(__s0, __p1); \\\n"
6241" __ret; \\\n"
6242"})\n"
6243"#else\n"
6244"#define vcvth_n_f16_u16(__p0, __p1) __extension__ ({ \\\n"
6245" uint16_t __s0 = __p0; \\\n"
6246" float16_t __ret; \\\n"
6247" __ret = (float16_t) __builtin_neon_vcvth_n_f16_u16(__s0, __p1); \\\n"
6248" __ret; \\\n"
6249"})\n"
6250"#endif\n"
6251"\n"
6252"#ifdef __LITTLE_ENDIAN__\n"
6253"#define vcvth_n_f16_s32(__p0, __p1) __extension__ ({ \\\n"
6254" int32_t __s0 = __p0; \\\n"
6255" float16_t __ret; \\\n"
6256" __ret = (float16_t) __builtin_neon_vcvth_n_f16_s32(__s0, __p1); \\\n"
6257" __ret; \\\n"
6258"})\n"
6259"#else\n"
6260"#define vcvth_n_f16_s32(__p0, __p1) __extension__ ({ \\\n"
6261" int32_t __s0 = __p0; \\\n"
6262" float16_t __ret; \\\n"
6263" __ret = (float16_t) __builtin_neon_vcvth_n_f16_s32(__s0, __p1); \\\n"
6264" __ret; \\\n"
6265"})\n"
6266"#endif\n"
6267"\n"
6268"#ifdef __LITTLE_ENDIAN__\n"
6269"#define vcvth_n_f16_s64(__p0, __p1) __extension__ ({ \\\n"
6270" int64_t __s0 = __p0; \\\n"
6271" float16_t __ret; \\\n"
6272" __ret = (float16_t) __builtin_neon_vcvth_n_f16_s64(__s0, __p1); \\\n"
6273" __ret; \\\n"
6274"})\n"
6275"#else\n"
6276"#define vcvth_n_f16_s64(__p0, __p1) __extension__ ({ \\\n"
6277" int64_t __s0 = __p0; \\\n"
6278" float16_t __ret; \\\n"
6279" __ret = (float16_t) __builtin_neon_vcvth_n_f16_s64(__s0, __p1); \\\n"
6280" __ret; \\\n"
6281"})\n"
6282"#endif\n"
6283"\n"
6284"#ifdef __LITTLE_ENDIAN__\n"
6285"#define vcvth_n_f16_s16(__p0, __p1) __extension__ ({ \\\n"
6286" int16_t __s0 = __p0; \\\n"
6287" float16_t __ret; \\\n"
6288" __ret = (float16_t) __builtin_neon_vcvth_n_f16_s16(__s0, __p1); \\\n"
6289" __ret; \\\n"
6290"})\n"
6291"#else\n"
6292"#define vcvth_n_f16_s16(__p0, __p1) __extension__ ({ \\\n"
6293" int16_t __s0 = __p0; \\\n"
6294" float16_t __ret; \\\n"
6295" __ret = (float16_t) __builtin_neon_vcvth_n_f16_s16(__s0, __p1); \\\n"
6296" __ret; \\\n"
6297"})\n"
6298"#endif\n"
6299"\n"
6300"#ifdef __LITTLE_ENDIAN__\n"
6301"#define vcvtmh_s16_f16(__p0) __extension__ ({ \\\n"
6302" float16_t __s0 = __p0; \\\n"
6303" int16_t __ret; \\\n"
6304" __ret = (int16_t) __builtin_neon_vcvtmh_s16_f16(__s0); \\\n"
6305" __ret; \\\n"
6306"})\n"
6307"#else\n"
6308"#define vcvtmh_s16_f16(__p0) __extension__ ({ \\\n"
6309" float16_t __s0 = __p0; \\\n"
6310" int16_t __ret; \\\n"
6311" __ret = (int16_t) __builtin_neon_vcvtmh_s16_f16(__s0); \\\n"
6312" __ret; \\\n"
6313"})\n"
6314"#endif\n"
6315"\n"
6316"#ifdef __LITTLE_ENDIAN__\n"
6317"#define vcvtmh_s32_f16(__p0) __extension__ ({ \\\n"
6318" float16_t __s0 = __p0; \\\n"
6319" int32_t __ret; \\\n"
6320" __ret = (int32_t) __builtin_neon_vcvtmh_s32_f16(__s0); \\\n"
6321" __ret; \\\n"
6322"})\n"
6323"#else\n"
6324"#define vcvtmh_s32_f16(__p0) __extension__ ({ \\\n"
6325" float16_t __s0 = __p0; \\\n"
6326" int32_t __ret; \\\n"
6327" __ret = (int32_t) __builtin_neon_vcvtmh_s32_f16(__s0); \\\n"
6328" __ret; \\\n"
6329"})\n"
6330"#endif\n"
6331"\n"
6332"#ifdef __LITTLE_ENDIAN__\n"
6333"#define vcvtmh_s64_f16(__p0) __extension__ ({ \\\n"
6334" float16_t __s0 = __p0; \\\n"
6335" int64_t __ret; \\\n"
6336" __ret = (int64_t) __builtin_neon_vcvtmh_s64_f16(__s0); \\\n"
6337" __ret; \\\n"
6338"})\n"
6339"#else\n"
6340"#define vcvtmh_s64_f16(__p0) __extension__ ({ \\\n"
6341" float16_t __s0 = __p0; \\\n"
6342" int64_t __ret; \\\n"
6343" __ret = (int64_t) __builtin_neon_vcvtmh_s64_f16(__s0); \\\n"
6344" __ret; \\\n"
6345"})\n"
6346"#endif\n"
6347"\n"
6348"#ifdef __LITTLE_ENDIAN__\n"
6349"#define vcvtmh_u16_f16(__p0) __extension__ ({ \\\n"
6350" float16_t __s0 = __p0; \\\n"
6351" uint16_t __ret; \\\n"
6352" __ret = (uint16_t) __builtin_neon_vcvtmh_u16_f16(__s0); \\\n"
6353" __ret; \\\n"
6354"})\n"
6355"#else\n"
6356"#define vcvtmh_u16_f16(__p0) __extension__ ({ \\\n"
6357" float16_t __s0 = __p0; \\\n"
6358" uint16_t __ret; \\\n"
6359" __ret = (uint16_t) __builtin_neon_vcvtmh_u16_f16(__s0); \\\n"
6360" __ret; \\\n"
6361"})\n"
6362"#endif\n"
6363"\n"
6364"#ifdef __LITTLE_ENDIAN__\n"
6365"#define vcvtmh_u32_f16(__p0) __extension__ ({ \\\n"
6366" float16_t __s0 = __p0; \\\n"
6367" uint32_t __ret; \\\n"
6368" __ret = (uint32_t) __builtin_neon_vcvtmh_u32_f16(__s0); \\\n"
6369" __ret; \\\n"
6370"})\n"
6371"#else\n"
6372"#define vcvtmh_u32_f16(__p0) __extension__ ({ \\\n"
6373" float16_t __s0 = __p0; \\\n"
6374" uint32_t __ret; \\\n"
6375" __ret = (uint32_t) __builtin_neon_vcvtmh_u32_f16(__s0); \\\n"
6376" __ret; \\\n"
6377"})\n"
6378"#endif\n"
6379"\n"
6380"#ifdef __LITTLE_ENDIAN__\n"
6381"#define vcvtmh_u64_f16(__p0) __extension__ ({ \\\n"
6382" float16_t __s0 = __p0; \\\n"
6383" uint64_t __ret; \\\n"
6384" __ret = (uint64_t) __builtin_neon_vcvtmh_u64_f16(__s0); \\\n"
6385" __ret; \\\n"
6386"})\n"
6387"#else\n"
6388"#define vcvtmh_u64_f16(__p0) __extension__ ({ \\\n"
6389" float16_t __s0 = __p0; \\\n"
6390" uint64_t __ret; \\\n"
6391" __ret = (uint64_t) __builtin_neon_vcvtmh_u64_f16(__s0); \\\n"
6392" __ret; \\\n"
6393"})\n"
6394"#endif\n"
6395"\n"
6396"#ifdef __LITTLE_ENDIAN__\n"
6397"#define vcvtnh_s16_f16(__p0) __extension__ ({ \\\n"
6398" float16_t __s0 = __p0; \\\n"
6399" int16_t __ret; \\\n"
6400" __ret = (int16_t) __builtin_neon_vcvtnh_s16_f16(__s0); \\\n"
6401" __ret; \\\n"
6402"})\n"
6403"#else\n"
6404"#define vcvtnh_s16_f16(__p0) __extension__ ({ \\\n"
6405" float16_t __s0 = __p0; \\\n"
6406" int16_t __ret; \\\n"
6407" __ret = (int16_t) __builtin_neon_vcvtnh_s16_f16(__s0); \\\n"
6408" __ret; \\\n"
6409"})\n"
6410"#endif\n"
6411"\n"
6412"#ifdef __LITTLE_ENDIAN__\n"
6413"#define vcvtnh_s32_f16(__p0) __extension__ ({ \\\n"
6414" float16_t __s0 = __p0; \\\n"
6415" int32_t __ret; \\\n"
6416" __ret = (int32_t) __builtin_neon_vcvtnh_s32_f16(__s0); \\\n"
6417" __ret; \\\n"
6418"})\n"
6419"#else\n"
6420"#define vcvtnh_s32_f16(__p0) __extension__ ({ \\\n"
6421" float16_t __s0 = __p0; \\\n"
6422" int32_t __ret; \\\n"
6423" __ret = (int32_t) __builtin_neon_vcvtnh_s32_f16(__s0); \\\n"
6424" __ret; \\\n"
6425"})\n"
6426"#endif\n"
6427"\n"
6428"#ifdef __LITTLE_ENDIAN__\n"
6429"#define vcvtnh_s64_f16(__p0) __extension__ ({ \\\n"
6430" float16_t __s0 = __p0; \\\n"
6431" int64_t __ret; \\\n"
6432" __ret = (int64_t) __builtin_neon_vcvtnh_s64_f16(__s0); \\\n"
6433" __ret; \\\n"
6434"})\n"
6435"#else\n"
6436"#define vcvtnh_s64_f16(__p0) __extension__ ({ \\\n"
6437" float16_t __s0 = __p0; \\\n"
6438" int64_t __ret; \\\n"
6439" __ret = (int64_t) __builtin_neon_vcvtnh_s64_f16(__s0); \\\n"
6440" __ret; \\\n"
6441"})\n"
6442"#endif\n"
6443"\n"
6444"#ifdef __LITTLE_ENDIAN__\n"
6445"#define vcvtnh_u16_f16(__p0) __extension__ ({ \\\n"
6446" float16_t __s0 = __p0; \\\n"
6447" uint16_t __ret; \\\n"
6448" __ret = (uint16_t) __builtin_neon_vcvtnh_u16_f16(__s0); \\\n"
6449" __ret; \\\n"
6450"})\n"
6451"#else\n"
6452"#define vcvtnh_u16_f16(__p0) __extension__ ({ \\\n"
6453" float16_t __s0 = __p0; \\\n"
6454" uint16_t __ret; \\\n"
6455" __ret = (uint16_t) __builtin_neon_vcvtnh_u16_f16(__s0); \\\n"
6456" __ret; \\\n"
6457"})\n"
6458"#endif\n"
6459"\n"
6460"#ifdef __LITTLE_ENDIAN__\n"
6461"#define vcvtnh_u32_f16(__p0) __extension__ ({ \\\n"
6462" float16_t __s0 = __p0; \\\n"
6463" uint32_t __ret; \\\n"
6464" __ret = (uint32_t) __builtin_neon_vcvtnh_u32_f16(__s0); \\\n"
6465" __ret; \\\n"
6466"})\n"
6467"#else\n"
6468"#define vcvtnh_u32_f16(__p0) __extension__ ({ \\\n"
6469" float16_t __s0 = __p0; \\\n"
6470" uint32_t __ret; \\\n"
6471" __ret = (uint32_t) __builtin_neon_vcvtnh_u32_f16(__s0); \\\n"
6472" __ret; \\\n"
6473"})\n"
6474"#endif\n"
6475"\n"
6476"#ifdef __LITTLE_ENDIAN__\n"
6477"#define vcvtnh_u64_f16(__p0) __extension__ ({ \\\n"
6478" float16_t __s0 = __p0; \\\n"
6479" uint64_t __ret; \\\n"
6480" __ret = (uint64_t) __builtin_neon_vcvtnh_u64_f16(__s0); \\\n"
6481" __ret; \\\n"
6482"})\n"
6483"#else\n"
6484"#define vcvtnh_u64_f16(__p0) __extension__ ({ \\\n"
6485" float16_t __s0 = __p0; \\\n"
6486" uint64_t __ret; \\\n"
6487" __ret = (uint64_t) __builtin_neon_vcvtnh_u64_f16(__s0); \\\n"
6488" __ret; \\\n"
6489"})\n"
6490"#endif\n"
6491"\n"
6492"#ifdef __LITTLE_ENDIAN__\n"
6493"#define vcvtph_s16_f16(__p0) __extension__ ({ \\\n"
6494" float16_t __s0 = __p0; \\\n"
6495" int16_t __ret; \\\n"
6496" __ret = (int16_t) __builtin_neon_vcvtph_s16_f16(__s0); \\\n"
6497" __ret; \\\n"
6498"})\n"
6499"#else\n"
6500"#define vcvtph_s16_f16(__p0) __extension__ ({ \\\n"
6501" float16_t __s0 = __p0; \\\n"
6502" int16_t __ret; \\\n"
6503" __ret = (int16_t) __builtin_neon_vcvtph_s16_f16(__s0); \\\n"
6504" __ret; \\\n"
6505"})\n"
6506"#endif\n"
6507"\n"
6508"#ifdef __LITTLE_ENDIAN__\n"
6509"#define vcvtph_s32_f16(__p0) __extension__ ({ \\\n"
6510" float16_t __s0 = __p0; \\\n"
6511" int32_t __ret; \\\n"
6512" __ret = (int32_t) __builtin_neon_vcvtph_s32_f16(__s0); \\\n"
6513" __ret; \\\n"
6514"})\n"
6515"#else\n"
6516"#define vcvtph_s32_f16(__p0) __extension__ ({ \\\n"
6517" float16_t __s0 = __p0; \\\n"
6518" int32_t __ret; \\\n"
6519" __ret = (int32_t) __builtin_neon_vcvtph_s32_f16(__s0); \\\n"
6520" __ret; \\\n"
6521"})\n"
6522"#endif\n"
6523"\n"
6524"#ifdef __LITTLE_ENDIAN__\n"
6525"#define vcvtph_s64_f16(__p0) __extension__ ({ \\\n"
6526" float16_t __s0 = __p0; \\\n"
6527" int64_t __ret; \\\n"
6528" __ret = (int64_t) __builtin_neon_vcvtph_s64_f16(__s0); \\\n"
6529" __ret; \\\n"
6530"})\n"
6531"#else\n"
6532"#define vcvtph_s64_f16(__p0) __extension__ ({ \\\n"
6533" float16_t __s0 = __p0; \\\n"
6534" int64_t __ret; \\\n"
6535" __ret = (int64_t) __builtin_neon_vcvtph_s64_f16(__s0); \\\n"
6536" __ret; \\\n"
6537"})\n"
6538"#endif\n"
6539"\n"
6540"#ifdef __LITTLE_ENDIAN__\n"
6541"#define vcvtph_u16_f16(__p0) __extension__ ({ \\\n"
6542" float16_t __s0 = __p0; \\\n"
6543" uint16_t __ret; \\\n"
6544" __ret = (uint16_t) __builtin_neon_vcvtph_u16_f16(__s0); \\\n"
6545" __ret; \\\n"
6546"})\n"
6547"#else\n"
6548"#define vcvtph_u16_f16(__p0) __extension__ ({ \\\n"
6549" float16_t __s0 = __p0; \\\n"
6550" uint16_t __ret; \\\n"
6551" __ret = (uint16_t) __builtin_neon_vcvtph_u16_f16(__s0); \\\n"
6552" __ret; \\\n"
6553"})\n"
6554"#endif\n"
6555"\n"
6556"#ifdef __LITTLE_ENDIAN__\n"
6557"#define vcvtph_u32_f16(__p0) __extension__ ({ \\\n"
6558" float16_t __s0 = __p0; \\\n"
6559" uint32_t __ret; \\\n"
6560" __ret = (uint32_t) __builtin_neon_vcvtph_u32_f16(__s0); \\\n"
6561" __ret; \\\n"
6562"})\n"
6563"#else\n"
6564"#define vcvtph_u32_f16(__p0) __extension__ ({ \\\n"
6565" float16_t __s0 = __p0; \\\n"
6566" uint32_t __ret; \\\n"
6567" __ret = (uint32_t) __builtin_neon_vcvtph_u32_f16(__s0); \\\n"
6568" __ret; \\\n"
6569"})\n"
6570"#endif\n"
6571"\n"
6572"#ifdef __LITTLE_ENDIAN__\n"
6573"#define vcvtph_u64_f16(__p0) __extension__ ({ \\\n"
6574" float16_t __s0 = __p0; \\\n"
6575" uint64_t __ret; \\\n"
6576" __ret = (uint64_t) __builtin_neon_vcvtph_u64_f16(__s0); \\\n"
6577" __ret; \\\n"
6578"})\n"
6579"#else\n"
6580"#define vcvtph_u64_f16(__p0) __extension__ ({ \\\n"
6581" float16_t __s0 = __p0; \\\n"
6582" uint64_t __ret; \\\n"
6583" __ret = (uint64_t) __builtin_neon_vcvtph_u64_f16(__s0); \\\n"
6584" __ret; \\\n"
6585"})\n"
6586"#endif\n"
6587"\n"
6588"#ifdef __LITTLE_ENDIAN__\n"
6589"#define vdivh_f16(__p0, __p1) __extension__ ({ \\\n"
6590" float16_t __s0 = __p0; \\\n"
6591" float16_t __s1 = __p1; \\\n"
6592" float16_t __ret; \\\n"
6593" __ret = (float16_t) __builtin_neon_vdivh_f16(__s0, __s1); \\\n"
6594" __ret; \\\n"
6595"})\n"
6596"#else\n"
6597"#define vdivh_f16(__p0, __p1) __extension__ ({ \\\n"
6598" float16_t __s0 = __p0; \\\n"
6599" float16_t __s1 = __p1; \\\n"
6600" float16_t __ret; \\\n"
6601" __ret = (float16_t) __builtin_neon_vdivh_f16(__s0, __s1); \\\n"
6602" __ret; \\\n"
6603"})\n"
6604"#endif\n"
6605"\n"
6606"#ifdef __LITTLE_ENDIAN__\n"
6607"#define vfmah_f16(__p0, __p1, __p2) __extension__ ({ \\\n"
6608" float16_t __s0 = __p0; \\\n"
6609" float16_t __s1 = __p1; \\\n"
6610" float16_t __s2 = __p2; \\\n"
6611" float16_t __ret; \\\n"
6612" __ret = (float16_t) __builtin_neon_vfmah_f16(__s0, __s1, __s2); \\\n"
6613" __ret; \\\n"
6614"})\n"
6615"#else\n"
6616"#define vfmah_f16(__p0, __p1, __p2) __extension__ ({ \\\n"
6617" float16_t __s0 = __p0; \\\n"
6618" float16_t __s1 = __p1; \\\n"
6619" float16_t __s2 = __p2; \\\n"
6620" float16_t __ret; \\\n"
6621" __ret = (float16_t) __builtin_neon_vfmah_f16(__s0, __s1, __s2); \\\n"
6622" __ret; \\\n"
6623"})\n"
6624"#endif\n"
6625"\n"
6626"#ifdef __LITTLE_ENDIAN__\n"
6627"#define vfmsh_f16(__p0, __p1, __p2) __extension__ ({ \\\n"
6628" float16_t __s0 = __p0; \\\n"
6629" float16_t __s1 = __p1; \\\n"
6630" float16_t __s2 = __p2; \\\n"
6631" float16_t __ret; \\\n"
6632" __ret = (float16_t) __builtin_neon_vfmsh_f16(__s0, __s1, __s2); \\\n"
6633" __ret; \\\n"
6634"})\n"
6635"#else\n"
6636"#define vfmsh_f16(__p0, __p1, __p2) __extension__ ({ \\\n"
6637" float16_t __s0 = __p0; \\\n"
6638" float16_t __s1 = __p1; \\\n"
6639" float16_t __s2 = __p2; \\\n"
6640" float16_t __ret; \\\n"
6641" __ret = (float16_t) __builtin_neon_vfmsh_f16(__s0, __s1, __s2); \\\n"
6642" __ret; \\\n"
6643"})\n"
6644"#endif\n"
6645"\n"
6646"#ifdef __LITTLE_ENDIAN__\n"
6647"#define vmaxh_f16(__p0, __p1) __extension__ ({ \\\n"
6648" float16_t __s0 = __p0; \\\n"
6649" float16_t __s1 = __p1; \\\n"
6650" float16_t __ret; \\\n"
6651" __ret = (float16_t) __builtin_neon_vmaxh_f16(__s0, __s1); \\\n"
6652" __ret; \\\n"
6653"})\n"
6654"#else\n"
6655"#define vmaxh_f16(__p0, __p1) __extension__ ({ \\\n"
6656" float16_t __s0 = __p0; \\\n"
6657" float16_t __s1 = __p1; \\\n"
6658" float16_t __ret; \\\n"
6659" __ret = (float16_t) __builtin_neon_vmaxh_f16(__s0, __s1); \\\n"
6660" __ret; \\\n"
6661"})\n"
6662"#endif\n"
6663"\n"
6664"#ifdef __LITTLE_ENDIAN__\n"
6665"#define vmaxnmh_f16(__p0, __p1) __extension__ ({ \\\n"
6666" float16_t __s0 = __p0; \\\n"
6667" float16_t __s1 = __p1; \\\n"
6668" float16_t __ret; \\\n"
6669" __ret = (float16_t) __builtin_neon_vmaxnmh_f16(__s0, __s1); \\\n"
6670" __ret; \\\n"
6671"})\n"
6672"#else\n"
6673"#define vmaxnmh_f16(__p0, __p1) __extension__ ({ \\\n"
6674" float16_t __s0 = __p0; \\\n"
6675" float16_t __s1 = __p1; \\\n"
6676" float16_t __ret; \\\n"
6677" __ret = (float16_t) __builtin_neon_vmaxnmh_f16(__s0, __s1); \\\n"
6678" __ret; \\\n"
6679"})\n"
6680"#endif\n"
6681"\n"
6682"#ifdef __LITTLE_ENDIAN__\n"
6683"#define vminh_f16(__p0, __p1) __extension__ ({ \\\n"
6684" float16_t __s0 = __p0; \\\n"
6685" float16_t __s1 = __p1; \\\n"
6686" float16_t __ret; \\\n"
6687" __ret = (float16_t) __builtin_neon_vminh_f16(__s0, __s1); \\\n"
6688" __ret; \\\n"
6689"})\n"
6690"#else\n"
6691"#define vminh_f16(__p0, __p1) __extension__ ({ \\\n"
6692" float16_t __s0 = __p0; \\\n"
6693" float16_t __s1 = __p1; \\\n"
6694" float16_t __ret; \\\n"
6695" __ret = (float16_t) __builtin_neon_vminh_f16(__s0, __s1); \\\n"
6696" __ret; \\\n"
6697"})\n"
6698"#endif\n"
6699"\n"
6700"#ifdef __LITTLE_ENDIAN__\n"
6701"#define vminnmh_f16(__p0, __p1) __extension__ ({ \\\n"
6702" float16_t __s0 = __p0; \\\n"
6703" float16_t __s1 = __p1; \\\n"
6704" float16_t __ret; \\\n"
6705" __ret = (float16_t) __builtin_neon_vminnmh_f16(__s0, __s1); \\\n"
6706" __ret; \\\n"
6707"})\n"
6708"#else\n"
6709"#define vminnmh_f16(__p0, __p1) __extension__ ({ \\\n"
6710" float16_t __s0 = __p0; \\\n"
6711" float16_t __s1 = __p1; \\\n"
6712" float16_t __ret; \\\n"
6713" __ret = (float16_t) __builtin_neon_vminnmh_f16(__s0, __s1); \\\n"
6714" __ret; \\\n"
6715"})\n"
6716"#endif\n"
6717"\n"
6718"#ifdef __LITTLE_ENDIAN__\n"
6719"#define vmulh_f16(__p0, __p1) __extension__ ({ \\\n"
6720" float16_t __s0 = __p0; \\\n"
6721" float16_t __s1 = __p1; \\\n"
6722" float16_t __ret; \\\n"
6723" __ret = (float16_t) __builtin_neon_vmulh_f16(__s0, __s1); \\\n"
6724" __ret; \\\n"
6725"})\n"
6726"#else\n"
6727"#define vmulh_f16(__p0, __p1) __extension__ ({ \\\n"
6728" float16_t __s0 = __p0; \\\n"
6729" float16_t __s1 = __p1; \\\n"
6730" float16_t __ret; \\\n"
6731" __ret = (float16_t) __builtin_neon_vmulh_f16(__s0, __s1); \\\n"
6732" __ret; \\\n"
6733"})\n"
6734"#endif\n"
6735"\n"
6736"#ifdef __LITTLE_ENDIAN__\n"
6737"#define vmulxh_f16(__p0, __p1) __extension__ ({ \\\n"
6738" float16_t __s0 = __p0; \\\n"
6739" float16_t __s1 = __p1; \\\n"
6740" float16_t __ret; \\\n"
6741" __ret = (float16_t) __builtin_neon_vmulxh_f16(__s0, __s1); \\\n"
6742" __ret; \\\n"
6743"})\n"
6744"#else\n"
6745"#define vmulxh_f16(__p0, __p1) __extension__ ({ \\\n"
6746" float16_t __s0 = __p0; \\\n"
6747" float16_t __s1 = __p1; \\\n"
6748" float16_t __ret; \\\n"
6749" __ret = (float16_t) __builtin_neon_vmulxh_f16(__s0, __s1); \\\n"
6750" __ret; \\\n"
6751"})\n"
6752"#endif\n"
6753"\n"
6754"#ifdef __LITTLE_ENDIAN__\n"
6755"#define vnegh_f16(__p0) __extension__ ({ \\\n"
6756" float16_t __s0 = __p0; \\\n"
6757" float16_t __ret; \\\n"
6758" __ret = (float16_t) __builtin_neon_vnegh_f16(__s0); \\\n"
6759" __ret; \\\n"
6760"})\n"
6761"#else\n"
6762"#define vnegh_f16(__p0) __extension__ ({ \\\n"
6763" float16_t __s0 = __p0; \\\n"
6764" float16_t __ret; \\\n"
6765" __ret = (float16_t) __builtin_neon_vnegh_f16(__s0); \\\n"
6766" __ret; \\\n"
6767"})\n"
6768"#endif\n"
6769"\n"
6770"#ifdef __LITTLE_ENDIAN__\n"
6771"#define vrecpeh_f16(__p0) __extension__ ({ \\\n"
6772" float16_t __s0 = __p0; \\\n"
6773" float16_t __ret; \\\n"
6774" __ret = (float16_t) __builtin_neon_vrecpeh_f16(__s0); \\\n"
6775" __ret; \\\n"
6776"})\n"
6777"#else\n"
6778"#define vrecpeh_f16(__p0) __extension__ ({ \\\n"
6779" float16_t __s0 = __p0; \\\n"
6780" float16_t __ret; \\\n"
6781" __ret = (float16_t) __builtin_neon_vrecpeh_f16(__s0); \\\n"
6782" __ret; \\\n"
6783"})\n"
6784"#endif\n"
6785"\n"
6786"#ifdef __LITTLE_ENDIAN__\n"
6787"#define vrecpsh_f16(__p0, __p1) __extension__ ({ \\\n"
6788" float16_t __s0 = __p0; \\\n"
6789" float16_t __s1 = __p1; \\\n"
6790" float16_t __ret; \\\n"
6791" __ret = (float16_t) __builtin_neon_vrecpsh_f16(__s0, __s1); \\\n"
6792" __ret; \\\n"
6793"})\n"
6794"#else\n"
6795"#define vrecpsh_f16(__p0, __p1) __extension__ ({ \\\n"
6796" float16_t __s0 = __p0; \\\n"
6797" float16_t __s1 = __p1; \\\n"
6798" float16_t __ret; \\\n"
6799" __ret = (float16_t) __builtin_neon_vrecpsh_f16(__s0, __s1); \\\n"
6800" __ret; \\\n"
6801"})\n"
6802"#endif\n"
6803"\n"
6804"#ifdef __LITTLE_ENDIAN__\n"
6805"#define vrecpxh_f16(__p0) __extension__ ({ \\\n"
6806" float16_t __s0 = __p0; \\\n"
6807" float16_t __ret; \\\n"
6808" __ret = (float16_t) __builtin_neon_vrecpxh_f16(__s0); \\\n"
6809" __ret; \\\n"
6810"})\n"
6811"#else\n"
6812"#define vrecpxh_f16(__p0) __extension__ ({ \\\n"
6813" float16_t __s0 = __p0; \\\n"
6814" float16_t __ret; \\\n"
6815" __ret = (float16_t) __builtin_neon_vrecpxh_f16(__s0); \\\n"
6816" __ret; \\\n"
6817"})\n"
6818"#endif\n"
6819"\n"
6820"#ifdef __LITTLE_ENDIAN__\n"
6821"#define vrndh_f16(__p0) __extension__ ({ \\\n"
6822" float16_t __s0 = __p0; \\\n"
6823" float16_t __ret; \\\n"
6824" __ret = (float16_t) __builtin_neon_vrndh_f16(__s0); \\\n"
6825" __ret; \\\n"
6826"})\n"
6827"#else\n"
6828"#define vrndh_f16(__p0) __extension__ ({ \\\n"
6829" float16_t __s0 = __p0; \\\n"
6830" float16_t __ret; \\\n"
6831" __ret = (float16_t) __builtin_neon_vrndh_f16(__s0); \\\n"
6832" __ret; \\\n"
6833"})\n"
6834"#endif\n"
6835"\n"
6836"#ifdef __LITTLE_ENDIAN__\n"
6837"#define vrndah_f16(__p0) __extension__ ({ \\\n"
6838" float16_t __s0 = __p0; \\\n"
6839" float16_t __ret; \\\n"
6840" __ret = (float16_t) __builtin_neon_vrndah_f16(__s0); \\\n"
6841" __ret; \\\n"
6842"})\n"
6843"#else\n"
6844"#define vrndah_f16(__p0) __extension__ ({ \\\n"
6845" float16_t __s0 = __p0; \\\n"
6846" float16_t __ret; \\\n"
6847" __ret = (float16_t) __builtin_neon_vrndah_f16(__s0); \\\n"
6848" __ret; \\\n"
6849"})\n"
6850"#endif\n"
6851"\n"
6852"#ifdef __LITTLE_ENDIAN__\n"
6853"#define vrndih_f16(__p0) __extension__ ({ \\\n"
6854" float16_t __s0 = __p0; \\\n"
6855" float16_t __ret; \\\n"
6856" __ret = (float16_t) __builtin_neon_vrndih_f16(__s0); \\\n"
6857" __ret; \\\n"
6858"})\n"
6859"#else\n"
6860"#define vrndih_f16(__p0) __extension__ ({ \\\n"
6861" float16_t __s0 = __p0; \\\n"
6862" float16_t __ret; \\\n"
6863" __ret = (float16_t) __builtin_neon_vrndih_f16(__s0); \\\n"
6864" __ret; \\\n"
6865"})\n"
6866"#endif\n"
6867"\n"
6868"#ifdef __LITTLE_ENDIAN__\n"
6869"#define vrndmh_f16(__p0) __extension__ ({ \\\n"
6870" float16_t __s0 = __p0; \\\n"
6871" float16_t __ret; \\\n"
6872" __ret = (float16_t) __builtin_neon_vrndmh_f16(__s0); \\\n"
6873" __ret; \\\n"
6874"})\n"
6875"#else\n"
6876"#define vrndmh_f16(__p0) __extension__ ({ \\\n"
6877" float16_t __s0 = __p0; \\\n"
6878" float16_t __ret; \\\n"
6879" __ret = (float16_t) __builtin_neon_vrndmh_f16(__s0); \\\n"
6880" __ret; \\\n"
6881"})\n"
6882"#endif\n"
6883"\n"
6884"#ifdef __LITTLE_ENDIAN__\n"
6885"#define vrndnh_f16(__p0) __extension__ ({ \\\n"
6886" float16_t __s0 = __p0; \\\n"
6887" float16_t __ret; \\\n"
6888" __ret = (float16_t) __builtin_neon_vrndnh_f16(__s0); \\\n"
6889" __ret; \\\n"
6890"})\n"
6891"#else\n"
6892"#define vrndnh_f16(__p0) __extension__ ({ \\\n"
6893" float16_t __s0 = __p0; \\\n"
6894" float16_t __ret; \\\n"
6895" __ret = (float16_t) __builtin_neon_vrndnh_f16(__s0); \\\n"
6896" __ret; \\\n"
6897"})\n"
6898"#endif\n"
6899"\n"
6900"#ifdef __LITTLE_ENDIAN__\n"
6901"#define vrndph_f16(__p0) __extension__ ({ \\\n"
6902" float16_t __s0 = __p0; \\\n"
6903" float16_t __ret; \\\n"
6904" __ret = (float16_t) __builtin_neon_vrndph_f16(__s0); \\\n"
6905" __ret; \\\n"
6906"})\n"
6907"#else\n"
6908"#define vrndph_f16(__p0) __extension__ ({ \\\n"
6909" float16_t __s0 = __p0; \\\n"
6910" float16_t __ret; \\\n"
6911" __ret = (float16_t) __builtin_neon_vrndph_f16(__s0); \\\n"
6912" __ret; \\\n"
6913"})\n"
6914"#endif\n"
6915"\n"
6916"#ifdef __LITTLE_ENDIAN__\n"
6917"#define vrndxh_f16(__p0) __extension__ ({ \\\n"
6918" float16_t __s0 = __p0; \\\n"
6919" float16_t __ret; \\\n"
6920" __ret = (float16_t) __builtin_neon_vrndxh_f16(__s0); \\\n"
6921" __ret; \\\n"
6922"})\n"
6923"#else\n"
6924"#define vrndxh_f16(__p0) __extension__ ({ \\\n"
6925" float16_t __s0 = __p0; \\\n"
6926" float16_t __ret; \\\n"
6927" __ret = (float16_t) __builtin_neon_vrndxh_f16(__s0); \\\n"
6928" __ret; \\\n"
6929"})\n"
6930"#endif\n"
6931"\n"
6932"#ifdef __LITTLE_ENDIAN__\n"
6933"#define vrsqrteh_f16(__p0) __extension__ ({ \\\n"
6934" float16_t __s0 = __p0; \\\n"
6935" float16_t __ret; \\\n"
6936" __ret = (float16_t) __builtin_neon_vrsqrteh_f16(__s0); \\\n"
6937" __ret; \\\n"
6938"})\n"
6939"#else\n"
6940"#define vrsqrteh_f16(__p0) __extension__ ({ \\\n"
6941" float16_t __s0 = __p0; \\\n"
6942" float16_t __ret; \\\n"
6943" __ret = (float16_t) __builtin_neon_vrsqrteh_f16(__s0); \\\n"
6944" __ret; \\\n"
6945"})\n"
6946"#endif\n"
6947"\n"
6948"#ifdef __LITTLE_ENDIAN__\n"
6949"#define vrsqrtsh_f16(__p0, __p1) __extension__ ({ \\\n"
6950" float16_t __s0 = __p0; \\\n"
6951" float16_t __s1 = __p1; \\\n"
6952" float16_t __ret; \\\n"
6953" __ret = (float16_t) __builtin_neon_vrsqrtsh_f16(__s0, __s1); \\\n"
6954" __ret; \\\n"
6955"})\n"
6956"#else\n"
6957"#define vrsqrtsh_f16(__p0, __p1) __extension__ ({ \\\n"
6958" float16_t __s0 = __p0; \\\n"
6959" float16_t __s1 = __p1; \\\n"
6960" float16_t __ret; \\\n"
6961" __ret = (float16_t) __builtin_neon_vrsqrtsh_f16(__s0, __s1); \\\n"
6962" __ret; \\\n"
6963"})\n"
6964"#endif\n"
6965"\n"
6966"#ifdef __LITTLE_ENDIAN__\n"
6967"#define vsqrth_f16(__p0) __extension__ ({ \\\n"
6968" float16_t __s0 = __p0; \\\n"
6969" float16_t __ret; \\\n"
6970" __ret = (float16_t) __builtin_neon_vsqrth_f16(__s0); \\\n"
6971" __ret; \\\n"
6972"})\n"
6973"#else\n"
6974"#define vsqrth_f16(__p0) __extension__ ({ \\\n"
6975" float16_t __s0 = __p0; \\\n"
6976" float16_t __ret; \\\n"
6977" __ret = (float16_t) __builtin_neon_vsqrth_f16(__s0); \\\n"
6978" __ret; \\\n"
6979"})\n"
6980"#endif\n"
6981"\n"
6982"#ifdef __LITTLE_ENDIAN__\n"
6983"#define vsubh_f16(__p0, __p1) __extension__ ({ \\\n"
6984" float16_t __s0 = __p0; \\\n"
6985" float16_t __s1 = __p1; \\\n"
6986" float16_t __ret; \\\n"
6987" __ret = (float16_t) __builtin_neon_vsubh_f16(__s0, __s1); \\\n"
6988" __ret; \\\n"
6989"})\n"
6990"#else\n"
6991"#define vsubh_f16(__p0, __p1) __extension__ ({ \\\n"
6992" float16_t __s0 = __p0; \\\n"
6993" float16_t __s1 = __p1; \\\n"
6994" float16_t __ret; \\\n"
6995" __ret = (float16_t) __builtin_neon_vsubh_f16(__s0, __s1); \\\n"
6996" __ret; \\\n"
6997"})\n"
6998"#endif\n"
6999"\n"
7000"#endif\n"
7001"\n"
7002"#undef __ai\n"
7003"\n"
7004"#endif /* __ARM_FP16_H */\n"
7005"" } ,
7006 { "/builtins/armintr.h" , "/*===---- armintr.h - ARM Windows intrinsics -------------------------------===\n"
7007" *\n"
7008" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
7009" * of this software and associated documentation files (the \"Software\"), to deal\n"
7010" * in the Software without restriction, including without limitation the rights\n"
7011" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
7012" * copies of the Software, and to permit persons to whom the Software is\n"
7013" * furnished to do so, subject to the following conditions:\n"
7014" *\n"
7015" * The above copyright notice and this permission notice shall be included in\n"
7016" * all copies or substantial portions of the Software.\n"
7017" *\n"
7018" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
7019" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
7020" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
7021" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
7022" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
7023" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
7024" * THE SOFTWARE.\n"
7025" *\n"
7026" *===-----------------------------------------------------------------------===\n"
7027" */\n"
7028"\n"
7029"/* Only include this if we're compiling for the windows platform. */\n"
7030"#ifndef _MSC_VER\n"
7031"#include_next <armintr.h>\n"
7032"#else\n"
7033"\n"
7034"#ifndef __ARMINTR_H\n"
7035"#define __ARMINTR_H\n"
7036"\n"
7037"typedef enum\n"
7038"{\n"
7039" _ARM_BARRIER_SY = 0xF,\n"
7040" _ARM_BARRIER_ST = 0xE,\n"
7041" _ARM_BARRIER_ISH = 0xB,\n"
7042" _ARM_BARRIER_ISHST = 0xA,\n"
7043" _ARM_BARRIER_NSH = 0x7,\n"
7044" _ARM_BARRIER_NSHST = 0x6,\n"
7045" _ARM_BARRIER_OSH = 0x3,\n"
7046" _ARM_BARRIER_OSHST = 0x2\n"
7047"} _ARMINTR_BARRIER_TYPE;\n"
7048"\n"
7049"#endif /* __ARMINTR_H */\n"
7050"#endif /* _MSC_VER */\n"
7051"" } ,
7052 { "/builtins/avx2intrin.h" , "/*===---- avx2intrin.h - AVX2 intrinsics -----------------------------------===\n"
7053" *\n"
7054" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
7055" * of this software and associated documentation files (the \"Software\"), to deal\n"
7056" * in the Software without restriction, including without limitation the rights\n"
7057" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
7058" * copies of the Software, and to permit persons to whom the Software is\n"
7059" * furnished to do so, subject to the following conditions:\n"
7060" *\n"
7061" * The above copyright notice and this permission notice shall be included in\n"
7062" * all copies or substantial portions of the Software.\n"
7063" *\n"
7064" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
7065" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
7066" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
7067" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
7068" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
7069" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
7070" * THE SOFTWARE.\n"
7071" *\n"
7072" *===-----------------------------------------------------------------------===\n"
7073" */\n"
7074"\n"
7075"#ifndef __IMMINTRIN_H\n"
7076"#error \"Never use <avx2intrin.h> directly; include <immintrin.h> instead.\"\n"
7077"#endif\n"
7078"\n"
7079"#ifndef __AVX2INTRIN_H\n"
7080"#define __AVX2INTRIN_H\n"
7081"\n"
7082"/* Define the default attributes for the functions in this file. */\n"
7083"#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__(\"avx2\"), __min_vector_width__(256)))\n"
7084"#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__(\"avx2\"), __min_vector_width__(128)))\n"
7085"\n"
7086"/* SSE4 Multiple Packed Sums of Absolute Difference. */\n"
7087"#define _mm256_mpsadbw_epu8(X, Y, M) \\\n"
7088" (__m256i)__builtin_ia32_mpsadbw256((__v32qi)(__m256i)(X), \\\n"
7089" (__v32qi)(__m256i)(Y), (int)(M))\n"
7090"\n"
7091"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7092"_mm256_abs_epi8(__m256i __a)\n"
7093"{\n"
7094" return (__m256i)__builtin_ia32_pabsb256((__v32qi)__a);\n"
7095"}\n"
7096"\n"
7097"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7098"_mm256_abs_epi16(__m256i __a)\n"
7099"{\n"
7100" return (__m256i)__builtin_ia32_pabsw256((__v16hi)__a);\n"
7101"}\n"
7102"\n"
7103"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7104"_mm256_abs_epi32(__m256i __a)\n"
7105"{\n"
7106" return (__m256i)__builtin_ia32_pabsd256((__v8si)__a);\n"
7107"}\n"
7108"\n"
7109"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7110"_mm256_packs_epi16(__m256i __a, __m256i __b)\n"
7111"{\n"
7112" return (__m256i)__builtin_ia32_packsswb256((__v16hi)__a, (__v16hi)__b);\n"
7113"}\n"
7114"\n"
7115"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7116"_mm256_packs_epi32(__m256i __a, __m256i __b)\n"
7117"{\n"
7118" return (__m256i)__builtin_ia32_packssdw256((__v8si)__a, (__v8si)__b);\n"
7119"}\n"
7120"\n"
7121"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7122"_mm256_packus_epi16(__m256i __a, __m256i __b)\n"
7123"{\n"
7124" return (__m256i)__builtin_ia32_packuswb256((__v16hi)__a, (__v16hi)__b);\n"
7125"}\n"
7126"\n"
7127"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7128"_mm256_packus_epi32(__m256i __V1, __m256i __V2)\n"
7129"{\n"
7130" return (__m256i) __builtin_ia32_packusdw256((__v8si)__V1, (__v8si)__V2);\n"
7131"}\n"
7132"\n"
7133"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7134"_mm256_add_epi8(__m256i __a, __m256i __b)\n"
7135"{\n"
7136" return (__m256i)((__v32qu)__a + (__v32qu)__b);\n"
7137"}\n"
7138"\n"
7139"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7140"_mm256_add_epi16(__m256i __a, __m256i __b)\n"
7141"{\n"
7142" return (__m256i)((__v16hu)__a + (__v16hu)__b);\n"
7143"}\n"
7144"\n"
7145"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7146"_mm256_add_epi32(__m256i __a, __m256i __b)\n"
7147"{\n"
7148" return (__m256i)((__v8su)__a + (__v8su)__b);\n"
7149"}\n"
7150"\n"
7151"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7152"_mm256_add_epi64(__m256i __a, __m256i __b)\n"
7153"{\n"
7154" return (__m256i)((__v4du)__a + (__v4du)__b);\n"
7155"}\n"
7156"\n"
7157"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7158"_mm256_adds_epi8(__m256i __a, __m256i __b)\n"
7159"{\n"
7160" return (__m256i)__builtin_ia32_paddsb256((__v32qi)__a, (__v32qi)__b);\n"
7161"}\n"
7162"\n"
7163"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7164"_mm256_adds_epi16(__m256i __a, __m256i __b)\n"
7165"{\n"
7166" return (__m256i)__builtin_ia32_paddsw256((__v16hi)__a, (__v16hi)__b);\n"
7167"}\n"
7168"\n"
7169"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7170"_mm256_adds_epu8(__m256i __a, __m256i __b)\n"
7171"{\n"
7172" return (__m256i)__builtin_ia32_paddusb256((__v32qi)__a, (__v32qi)__b);\n"
7173"}\n"
7174"\n"
7175"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7176"_mm256_adds_epu16(__m256i __a, __m256i __b)\n"
7177"{\n"
7178" return (__m256i)__builtin_ia32_paddusw256((__v16hi)__a, (__v16hi)__b);\n"
7179"}\n"
7180"\n"
7181"#define _mm256_alignr_epi8(a, b, n) \\\n"
7182" (__m256i)__builtin_ia32_palignr256((__v32qi)(__m256i)(a), \\\n"
7183" (__v32qi)(__m256i)(b), (n))\n"
7184"\n"
7185"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7186"_mm256_and_si256(__m256i __a, __m256i __b)\n"
7187"{\n"
7188" return (__m256i)((__v4du)__a & (__v4du)__b);\n"
7189"}\n"
7190"\n"
7191"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7192"_mm256_andnot_si256(__m256i __a, __m256i __b)\n"
7193"{\n"
7194" return (__m256i)(~(__v4du)__a & (__v4du)__b);\n"
7195"}\n"
7196"\n"
7197"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7198"_mm256_avg_epu8(__m256i __a, __m256i __b)\n"
7199"{\n"
7200" typedef unsigned short __v32hu __attribute__((__vector_size__(64)));\n"
7201" return (__m256i)__builtin_convertvector(\n"
7202" ((__builtin_convertvector((__v32qu)__a, __v32hu) +\n"
7203" __builtin_convertvector((__v32qu)__b, __v32hu)) + 1)\n"
7204" >> 1, __v32qu);\n"
7205"}\n"
7206"\n"
7207"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7208"_mm256_avg_epu16(__m256i __a, __m256i __b)\n"
7209"{\n"
7210" typedef unsigned int __v16su __attribute__((__vector_size__(64)));\n"
7211" return (__m256i)__builtin_convertvector(\n"
7212" ((__builtin_convertvector((__v16hu)__a, __v16su) +\n"
7213" __builtin_convertvector((__v16hu)__b, __v16su)) + 1)\n"
7214" >> 1, __v16hu);\n"
7215"}\n"
7216"\n"
7217"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7218"_mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M)\n"
7219"{\n"
7220" return (__m256i)__builtin_ia32_pblendvb256((__v32qi)__V1, (__v32qi)__V2,\n"
7221" (__v32qi)__M);\n"
7222"}\n"
7223"\n"
7224"#define _mm256_blend_epi16(V1, V2, M) \\\n"
7225" (__m256i)__builtin_ia32_pblendw256((__v16hi)(__m256i)(V1), \\\n"
7226" (__v16hi)(__m256i)(V2), (int)(M))\n"
7227"\n"
7228"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7229"_mm256_cmpeq_epi8(__m256i __a, __m256i __b)\n"
7230"{\n"
7231" return (__m256i)((__v32qi)__a == (__v32qi)__b);\n"
7232"}\n"
7233"\n"
7234"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7235"_mm256_cmpeq_epi16(__m256i __a, __m256i __b)\n"
7236"{\n"
7237" return (__m256i)((__v16hi)__a == (__v16hi)__b);\n"
7238"}\n"
7239"\n"
7240"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7241"_mm256_cmpeq_epi32(__m256i __a, __m256i __b)\n"
7242"{\n"
7243" return (__m256i)((__v8si)__a == (__v8si)__b);\n"
7244"}\n"
7245"\n"
7246"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7247"_mm256_cmpeq_epi64(__m256i __a, __m256i __b)\n"
7248"{\n"
7249" return (__m256i)((__v4di)__a == (__v4di)__b);\n"
7250"}\n"
7251"\n"
7252"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7253"_mm256_cmpgt_epi8(__m256i __a, __m256i __b)\n"
7254"{\n"
7255" /* This function always performs a signed comparison, but __v32qi is a char\n"
7256" which may be signed or unsigned, so use __v32qs. */\n"
7257" return (__m256i)((__v32qs)__a > (__v32qs)__b);\n"
7258"}\n"
7259"\n"
7260"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7261"_mm256_cmpgt_epi16(__m256i __a, __m256i __b)\n"
7262"{\n"
7263" return (__m256i)((__v16hi)__a > (__v16hi)__b);\n"
7264"}\n"
7265"\n"
7266"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7267"_mm256_cmpgt_epi32(__m256i __a, __m256i __b)\n"
7268"{\n"
7269" return (__m256i)((__v8si)__a > (__v8si)__b);\n"
7270"}\n"
7271"\n"
7272"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7273"_mm256_cmpgt_epi64(__m256i __a, __m256i __b)\n"
7274"{\n"
7275" return (__m256i)((__v4di)__a > (__v4di)__b);\n"
7276"}\n"
7277"\n"
7278"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7279"_mm256_hadd_epi16(__m256i __a, __m256i __b)\n"
7280"{\n"
7281" return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b);\n"
7282"}\n"
7283"\n"
7284"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7285"_mm256_hadd_epi32(__m256i __a, __m256i __b)\n"
7286"{\n"
7287" return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b);\n"
7288"}\n"
7289"\n"
7290"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7291"_mm256_hadds_epi16(__m256i __a, __m256i __b)\n"
7292"{\n"
7293" return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b);\n"
7294"}\n"
7295"\n"
7296"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7297"_mm256_hsub_epi16(__m256i __a, __m256i __b)\n"
7298"{\n"
7299" return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b);\n"
7300"}\n"
7301"\n"
7302"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7303"_mm256_hsub_epi32(__m256i __a, __m256i __b)\n"
7304"{\n"
7305" return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b);\n"
7306"}\n"
7307"\n"
7308"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7309"_mm256_hsubs_epi16(__m256i __a, __m256i __b)\n"
7310"{\n"
7311" return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b);\n"
7312"}\n"
7313"\n"
7314"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7315"_mm256_maddubs_epi16(__m256i __a, __m256i __b)\n"
7316"{\n"
7317" return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)__a, (__v32qi)__b);\n"
7318"}\n"
7319"\n"
7320"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7321"_mm256_madd_epi16(__m256i __a, __m256i __b)\n"
7322"{\n"
7323" return (__m256i)__builtin_ia32_pmaddwd256((__v16hi)__a, (__v16hi)__b);\n"
7324"}\n"
7325"\n"
7326"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7327"_mm256_max_epi8(__m256i __a, __m256i __b)\n"
7328"{\n"
7329" return (__m256i)__builtin_ia32_pmaxsb256((__v32qi)__a, (__v32qi)__b);\n"
7330"}\n"
7331"\n"
7332"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7333"_mm256_max_epi16(__m256i __a, __m256i __b)\n"
7334"{\n"
7335" return (__m256i)__builtin_ia32_pmaxsw256((__v16hi)__a, (__v16hi)__b);\n"
7336"}\n"
7337"\n"
7338"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7339"_mm256_max_epi32(__m256i __a, __m256i __b)\n"
7340"{\n"
7341" return (__m256i)__builtin_ia32_pmaxsd256((__v8si)__a, (__v8si)__b);\n"
7342"}\n"
7343"\n"
7344"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7345"_mm256_max_epu8(__m256i __a, __m256i __b)\n"
7346"{\n"
7347" return (__m256i)__builtin_ia32_pmaxub256((__v32qi)__a, (__v32qi)__b);\n"
7348"}\n"
7349"\n"
7350"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7351"_mm256_max_epu16(__m256i __a, __m256i __b)\n"
7352"{\n"
7353" return (__m256i)__builtin_ia32_pmaxuw256((__v16hi)__a, (__v16hi)__b);\n"
7354"}\n"
7355"\n"
7356"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7357"_mm256_max_epu32(__m256i __a, __m256i __b)\n"
7358"{\n"
7359" return (__m256i)__builtin_ia32_pmaxud256((__v8si)__a, (__v8si)__b);\n"
7360"}\n"
7361"\n"
7362"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7363"_mm256_min_epi8(__m256i __a, __m256i __b)\n"
7364"{\n"
7365" return (__m256i)__builtin_ia32_pminsb256((__v32qi)__a, (__v32qi)__b);\n"
7366"}\n"
7367"\n"
7368"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7369"_mm256_min_epi16(__m256i __a, __m256i __b)\n"
7370"{\n"
7371" return (__m256i)__builtin_ia32_pminsw256((__v16hi)__a, (__v16hi)__b);\n"
7372"}\n"
7373"\n"
7374"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7375"_mm256_min_epi32(__m256i __a, __m256i __b)\n"
7376"{\n"
7377" return (__m256i)__builtin_ia32_pminsd256((__v8si)__a, (__v8si)__b);\n"
7378"}\n"
7379"\n"
7380"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7381"_mm256_min_epu8(__m256i __a, __m256i __b)\n"
7382"{\n"
7383" return (__m256i)__builtin_ia32_pminub256((__v32qi)__a, (__v32qi)__b);\n"
7384"}\n"
7385"\n"
7386"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7387"_mm256_min_epu16(__m256i __a, __m256i __b)\n"
7388"{\n"
7389" return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__a, (__v16hi)__b);\n"
7390"}\n"
7391"\n"
7392"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7393"_mm256_min_epu32(__m256i __a, __m256i __b)\n"
7394"{\n"
7395" return (__m256i)__builtin_ia32_pminud256((__v8si)__a, (__v8si)__b);\n"
7396"}\n"
7397"\n"
7398"static __inline__ int __DEFAULT_FN_ATTRS256\n"
7399"_mm256_movemask_epi8(__m256i __a)\n"
7400"{\n"
7401" return __builtin_ia32_pmovmskb256((__v32qi)__a);\n"
7402"}\n"
7403"\n"
7404"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7405"_mm256_cvtepi8_epi16(__m128i __V)\n"
7406"{\n"
7407" /* This function always performs a signed extension, but __v16qi is a char\n"
7408" which may be signed or unsigned, so use __v16qs. */\n"
7409" return (__m256i)__builtin_convertvector((__v16qs)__V, __v16hi);\n"
7410"}\n"
7411"\n"
7412"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7413"_mm256_cvtepi8_epi32(__m128i __V)\n"
7414"{\n"
7415" /* This function always performs a signed extension, but __v16qi is a char\n"
7416" which may be signed or unsigned, so use __v16qs. */\n"
7417" return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si);\n"
7418"}\n"
7419"\n"
7420"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7421"_mm256_cvtepi8_epi64(__m128i __V)\n"
7422"{\n"
7423" /* This function always performs a signed extension, but __v16qi is a char\n"
7424" which may be signed or unsigned, so use __v16qs. */\n"
7425" return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4di);\n"
7426"}\n"
7427"\n"
7428"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7429"_mm256_cvtepi16_epi32(__m128i __V)\n"
7430"{\n"
7431" return (__m256i)__builtin_convertvector((__v8hi)__V, __v8si);\n"
7432"}\n"
7433"\n"
7434"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7435"_mm256_cvtepi16_epi64(__m128i __V)\n"
7436"{\n"
7437" return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4di);\n"
7438"}\n"
7439"\n"
7440"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7441"_mm256_cvtepi32_epi64(__m128i __V)\n"
7442"{\n"
7443" return (__m256i)__builtin_convertvector((__v4si)__V, __v4di);\n"
7444"}\n"
7445"\n"
7446"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7447"_mm256_cvtepu8_epi16(__m128i __V)\n"
7448"{\n"
7449" return (__m256i)__builtin_convertvector((__v16qu)__V, __v16hi);\n"
7450"}\n"
7451"\n"
7452"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7453"_mm256_cvtepu8_epi32(__m128i __V)\n"
7454"{\n"
7455" return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si);\n"
7456"}\n"
7457"\n"
7458"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7459"_mm256_cvtepu8_epi64(__m128i __V)\n"
7460"{\n"
7461" return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4di);\n"
7462"}\n"
7463"\n"
7464"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7465"_mm256_cvtepu16_epi32(__m128i __V)\n"
7466"{\n"
7467" return (__m256i)__builtin_convertvector((__v8hu)__V, __v8si);\n"
7468"}\n"
7469"\n"
7470"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7471"_mm256_cvtepu16_epi64(__m128i __V)\n"
7472"{\n"
7473" return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4di);\n"
7474"}\n"
7475"\n"
7476"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7477"_mm256_cvtepu32_epi64(__m128i __V)\n"
7478"{\n"
7479" return (__m256i)__builtin_convertvector((__v4su)__V, __v4di);\n"
7480"}\n"
7481"\n"
7482"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7483"_mm256_mul_epi32(__m256i __a, __m256i __b)\n"
7484"{\n"
7485" return (__m256i)__builtin_ia32_pmuldq256((__v8si)__a, (__v8si)__b);\n"
7486"}\n"
7487"\n"
7488"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7489"_mm256_mulhrs_epi16(__m256i __a, __m256i __b)\n"
7490"{\n"
7491" return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)__a, (__v16hi)__b);\n"
7492"}\n"
7493"\n"
7494"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7495"_mm256_mulhi_epu16(__m256i __a, __m256i __b)\n"
7496"{\n"
7497" return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)__a, (__v16hi)__b);\n"
7498"}\n"
7499"\n"
7500"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7501"_mm256_mulhi_epi16(__m256i __a, __m256i __b)\n"
7502"{\n"
7503" return (__m256i)__builtin_ia32_pmulhw256((__v16hi)__a, (__v16hi)__b);\n"
7504"}\n"
7505"\n"
7506"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7507"_mm256_mullo_epi16(__m256i __a, __m256i __b)\n"
7508"{\n"
7509" return (__m256i)((__v16hu)__a * (__v16hu)__b);\n"
7510"}\n"
7511"\n"
7512"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7513"_mm256_mullo_epi32 (__m256i __a, __m256i __b)\n"
7514"{\n"
7515" return (__m256i)((__v8su)__a * (__v8su)__b);\n"
7516"}\n"
7517"\n"
7518"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7519"_mm256_mul_epu32(__m256i __a, __m256i __b)\n"
7520"{\n"
7521" return __builtin_ia32_pmuludq256((__v8si)__a, (__v8si)__b);\n"
7522"}\n"
7523"\n"
7524"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7525"_mm256_or_si256(__m256i __a, __m256i __b)\n"
7526"{\n"
7527" return (__m256i)((__v4du)__a | (__v4du)__b);\n"
7528"}\n"
7529"\n"
7530"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7531"_mm256_sad_epu8(__m256i __a, __m256i __b)\n"
7532"{\n"
7533" return __builtin_ia32_psadbw256((__v32qi)__a, (__v32qi)__b);\n"
7534"}\n"
7535"\n"
7536"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7537"_mm256_shuffle_epi8(__m256i __a, __m256i __b)\n"
7538"{\n"
7539" return (__m256i)__builtin_ia32_pshufb256((__v32qi)__a, (__v32qi)__b);\n"
7540"}\n"
7541"\n"
7542"#define _mm256_shuffle_epi32(a, imm) \\\n"
7543" (__m256i)__builtin_ia32_pshufd256((__v8si)(__m256i)(a), (int)(imm))\n"
7544"\n"
7545"#define _mm256_shufflehi_epi16(a, imm) \\\n"
7546" (__m256i)__builtin_ia32_pshufhw256((__v16hi)(__m256i)(a), (int)(imm))\n"
7547"\n"
7548"#define _mm256_shufflelo_epi16(a, imm) \\\n"
7549" (__m256i)__builtin_ia32_pshuflw256((__v16hi)(__m256i)(a), (int)(imm))\n"
7550"\n"
7551"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7552"_mm256_sign_epi8(__m256i __a, __m256i __b)\n"
7553"{\n"
7554" return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b);\n"
7555"}\n"
7556"\n"
7557"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7558"_mm256_sign_epi16(__m256i __a, __m256i __b)\n"
7559"{\n"
7560" return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b);\n"
7561"}\n"
7562"\n"
7563"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7564"_mm256_sign_epi32(__m256i __a, __m256i __b)\n"
7565"{\n"
7566" return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b);\n"
7567"}\n"
7568"\n"
7569"#define _mm256_slli_si256(a, imm) \\\n"
7570" (__m256i)__builtin_ia32_pslldqi256_byteshift((__v4di)(__m256i)(a), (int)(imm))\n"
7571"\n"
7572"#define _mm256_bslli_epi128(a, imm) \\\n"
7573" (__m256i)__builtin_ia32_pslldqi256_byteshift((__v4di)(__m256i)(a), (int)(imm))\n"
7574"\n"
7575"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7576"_mm256_slli_epi16(__m256i __a, int __count)\n"
7577"{\n"
7578" return (__m256i)__builtin_ia32_psllwi256((__v16hi)__a, __count);\n"
7579"}\n"
7580"\n"
7581"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7582"_mm256_sll_epi16(__m256i __a, __m128i __count)\n"
7583"{\n"
7584" return (__m256i)__builtin_ia32_psllw256((__v16hi)__a, (__v8hi)__count);\n"
7585"}\n"
7586"\n"
7587"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7588"_mm256_slli_epi32(__m256i __a, int __count)\n"
7589"{\n"
7590" return (__m256i)__builtin_ia32_pslldi256((__v8si)__a, __count);\n"
7591"}\n"
7592"\n"
7593"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7594"_mm256_sll_epi32(__m256i __a, __m128i __count)\n"
7595"{\n"
7596" return (__m256i)__builtin_ia32_pslld256((__v8si)__a, (__v4si)__count);\n"
7597"}\n"
7598"\n"
7599"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7600"_mm256_slli_epi64(__m256i __a, int __count)\n"
7601"{\n"
7602" return __builtin_ia32_psllqi256((__v4di)__a, __count);\n"
7603"}\n"
7604"\n"
7605"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7606"_mm256_sll_epi64(__m256i __a, __m128i __count)\n"
7607"{\n"
7608" return __builtin_ia32_psllq256((__v4di)__a, __count);\n"
7609"}\n"
7610"\n"
7611"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7612"_mm256_srai_epi16(__m256i __a, int __count)\n"
7613"{\n"
7614" return (__m256i)__builtin_ia32_psrawi256((__v16hi)__a, __count);\n"
7615"}\n"
7616"\n"
7617"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7618"_mm256_sra_epi16(__m256i __a, __m128i __count)\n"
7619"{\n"
7620" return (__m256i)__builtin_ia32_psraw256((__v16hi)__a, (__v8hi)__count);\n"
7621"}\n"
7622"\n"
7623"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7624"_mm256_srai_epi32(__m256i __a, int __count)\n"
7625"{\n"
7626" return (__m256i)__builtin_ia32_psradi256((__v8si)__a, __count);\n"
7627"}\n"
7628"\n"
7629"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7630"_mm256_sra_epi32(__m256i __a, __m128i __count)\n"
7631"{\n"
7632" return (__m256i)__builtin_ia32_psrad256((__v8si)__a, (__v4si)__count);\n"
7633"}\n"
7634"\n"
7635"#define _mm256_srli_si256(a, imm) \\\n"
7636" (__m256i)__builtin_ia32_psrldqi256_byteshift((__m256i)(a), (int)(imm))\n"
7637"\n"
7638"#define _mm256_bsrli_epi128(a, imm) \\\n"
7639" (__m256i)__builtin_ia32_psrldqi256_byteshift((__m256i)(a), (int)(imm))\n"
7640"\n"
7641"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7642"_mm256_srli_epi16(__m256i __a, int __count)\n"
7643"{\n"
7644" return (__m256i)__builtin_ia32_psrlwi256((__v16hi)__a, __count);\n"
7645"}\n"
7646"\n"
7647"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7648"_mm256_srl_epi16(__m256i __a, __m128i __count)\n"
7649"{\n"
7650" return (__m256i)__builtin_ia32_psrlw256((__v16hi)__a, (__v8hi)__count);\n"
7651"}\n"
7652"\n"
7653"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7654"_mm256_srli_epi32(__m256i __a, int __count)\n"
7655"{\n"
7656" return (__m256i)__builtin_ia32_psrldi256((__v8si)__a, __count);\n"
7657"}\n"
7658"\n"
7659"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7660"_mm256_srl_epi32(__m256i __a, __m128i __count)\n"
7661"{\n"
7662" return (__m256i)__builtin_ia32_psrld256((__v8si)__a, (__v4si)__count);\n"
7663"}\n"
7664"\n"
7665"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7666"_mm256_srli_epi64(__m256i __a, int __count)\n"
7667"{\n"
7668" return __builtin_ia32_psrlqi256((__v4di)__a, __count);\n"
7669"}\n"
7670"\n"
7671"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7672"_mm256_srl_epi64(__m256i __a, __m128i __count)\n"
7673"{\n"
7674" return __builtin_ia32_psrlq256((__v4di)__a, __count);\n"
7675"}\n"
7676"\n"
7677"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7678"_mm256_sub_epi8(__m256i __a, __m256i __b)\n"
7679"{\n"
7680" return (__m256i)((__v32qu)__a - (__v32qu)__b);\n"
7681"}\n"
7682"\n"
7683"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7684"_mm256_sub_epi16(__m256i __a, __m256i __b)\n"
7685"{\n"
7686" return (__m256i)((__v16hu)__a - (__v16hu)__b);\n"
7687"}\n"
7688"\n"
7689"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7690"_mm256_sub_epi32(__m256i __a, __m256i __b)\n"
7691"{\n"
7692" return (__m256i)((__v8su)__a - (__v8su)__b);\n"
7693"}\n"
7694"\n"
7695"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7696"_mm256_sub_epi64(__m256i __a, __m256i __b)\n"
7697"{\n"
7698" return (__m256i)((__v4du)__a - (__v4du)__b);\n"
7699"}\n"
7700"\n"
7701"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7702"_mm256_subs_epi8(__m256i __a, __m256i __b)\n"
7703"{\n"
7704" return (__m256i)__builtin_ia32_psubsb256((__v32qi)__a, (__v32qi)__b);\n"
7705"}\n"
7706"\n"
7707"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7708"_mm256_subs_epi16(__m256i __a, __m256i __b)\n"
7709"{\n"
7710" return (__m256i)__builtin_ia32_psubsw256((__v16hi)__a, (__v16hi)__b);\n"
7711"}\n"
7712"\n"
7713"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7714"_mm256_subs_epu8(__m256i __a, __m256i __b)\n"
7715"{\n"
7716" return (__m256i)__builtin_ia32_psubusb256((__v32qi)__a, (__v32qi)__b);\n"
7717"}\n"
7718"\n"
7719"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7720"_mm256_subs_epu16(__m256i __a, __m256i __b)\n"
7721"{\n"
7722" return (__m256i)__builtin_ia32_psubusw256((__v16hi)__a, (__v16hi)__b);\n"
7723"}\n"
7724"\n"
7725"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7726"_mm256_unpackhi_epi8(__m256i __a, __m256i __b)\n"
7727"{\n"
7728" return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 8, 32+8, 9, 32+9, 10, 32+10, 11, 32+11, 12, 32+12, 13, 32+13, 14, 32+14, 15, 32+15, 24, 32+24, 25, 32+25, 26, 32+26, 27, 32+27, 28, 32+28, 29, 32+29, 30, 32+30, 31, 32+31);\n"
7729"}\n"
7730"\n"
7731"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7732"_mm256_unpackhi_epi16(__m256i __a, __m256i __b)\n"
7733"{\n"
7734" return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);\n"
7735"}\n"
7736"\n"
7737"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7738"_mm256_unpackhi_epi32(__m256i __a, __m256i __b)\n"
7739"{\n"
7740" return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 2, 8+2, 3, 8+3, 6, 8+6, 7, 8+7);\n"
7741"}\n"
7742"\n"
7743"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7744"_mm256_unpackhi_epi64(__m256i __a, __m256i __b)\n"
7745"{\n"
7746" return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 1, 4+1, 3, 4+3);\n"
7747"}\n"
7748"\n"
7749"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7750"_mm256_unpacklo_epi8(__m256i __a, __m256i __b)\n"
7751"{\n"
7752" return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 0, 32+0, 1, 32+1, 2, 32+2, 3, 32+3, 4, 32+4, 5, 32+5, 6, 32+6, 7, 32+7, 16, 32+16, 17, 32+17, 18, 32+18, 19, 32+19, 20, 32+20, 21, 32+21, 22, 32+22, 23, 32+23);\n"
7753"}\n"
7754"\n"
7755"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7756"_mm256_unpacklo_epi16(__m256i __a, __m256i __b)\n"
7757"{\n"
7758" return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11);\n"
7759"}\n"
7760"\n"
7761"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7762"_mm256_unpacklo_epi32(__m256i __a, __m256i __b)\n"
7763"{\n"
7764" return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 0, 8+0, 1, 8+1, 4, 8+4, 5, 8+5);\n"
7765"}\n"
7766"\n"
7767"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7768"_mm256_unpacklo_epi64(__m256i __a, __m256i __b)\n"
7769"{\n"
7770" return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 0, 4+0, 2, 4+2);\n"
7771"}\n"
7772"\n"
7773"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7774"_mm256_xor_si256(__m256i __a, __m256i __b)\n"
7775"{\n"
7776" return (__m256i)((__v4du)__a ^ (__v4du)__b);\n"
7777"}\n"
7778"\n"
7779"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7780"_mm256_stream_load_si256(__m256i const *__V)\n"
7781"{\n"
7782" typedef __v4di __v4di_aligned __attribute__((aligned(32)));\n"
7783" return (__m256i)__builtin_nontemporal_load((const __v4di_aligned *)__V);\n"
7784"}\n"
7785"\n"
7786"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
7787"_mm_broadcastss_ps(__m128 __X)\n"
7788"{\n"
7789" return (__m128)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0);\n"
7790"}\n"
7791"\n"
7792"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
7793"_mm_broadcastsd_pd(__m128d __a)\n"
7794"{\n"
7795" return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);\n"
7796"}\n"
7797"\n"
7798"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
7799"_mm256_broadcastss_ps(__m128 __X)\n"
7800"{\n"
7801" return (__m256)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0, 0, 0, 0, 0);\n"
7802"}\n"
7803"\n"
7804"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
7805"_mm256_broadcastsd_pd(__m128d __X)\n"
7806"{\n"
7807" return (__m256d)__builtin_shufflevector((__v2df)__X, (__v2df)__X, 0, 0, 0, 0);\n"
7808"}\n"
7809"\n"
7810"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7811"_mm256_broadcastsi128_si256(__m128i __X)\n"
7812"{\n"
7813" return (__m256i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 1, 0, 1);\n"
7814"}\n"
7815"\n"
7816"#define _mm_blend_epi32(V1, V2, M) \\\n"
7817" (__m128i)__builtin_ia32_pblendd128((__v4si)(__m128i)(V1), \\\n"
7818" (__v4si)(__m128i)(V2), (int)(M))\n"
7819"\n"
7820"#define _mm256_blend_epi32(V1, V2, M) \\\n"
7821" (__m256i)__builtin_ia32_pblendd256((__v8si)(__m256i)(V1), \\\n"
7822" (__v8si)(__m256i)(V2), (int)(M))\n"
7823"\n"
7824"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7825"_mm256_broadcastb_epi8(__m128i __X)\n"
7826"{\n"
7827" return (__m256i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);\n"
7828"}\n"
7829"\n"
7830"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7831"_mm256_broadcastw_epi16(__m128i __X)\n"
7832"{\n"
7833" return (__m256i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);\n"
7834"}\n"
7835"\n"
7836"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7837"_mm256_broadcastd_epi32(__m128i __X)\n"
7838"{\n"
7839" return (__m256i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0, 0, 0, 0, 0);\n"
7840"}\n"
7841"\n"
7842"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7843"_mm256_broadcastq_epi64(__m128i __X)\n"
7844"{\n"
7845" return (__m256i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 0, 0, 0);\n"
7846"}\n"
7847"\n"
7848"static __inline__ __m128i __DEFAULT_FN_ATTRS128\n"
7849"_mm_broadcastb_epi8(__m128i __X)\n"
7850"{\n"
7851" return (__m128i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);\n"
7852"}\n"
7853"\n"
7854"static __inline__ __m128i __DEFAULT_FN_ATTRS128\n"
7855"_mm_broadcastw_epi16(__m128i __X)\n"
7856"{\n"
7857" return (__m128i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0);\n"
7858"}\n"
7859"\n"
7860"\n"
7861"static __inline__ __m128i __DEFAULT_FN_ATTRS128\n"
7862"_mm_broadcastd_epi32(__m128i __X)\n"
7863"{\n"
7864" return (__m128i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0);\n"
7865"}\n"
7866"\n"
7867"static __inline__ __m128i __DEFAULT_FN_ATTRS128\n"
7868"_mm_broadcastq_epi64(__m128i __X)\n"
7869"{\n"
7870" return (__m128i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 0);\n"
7871"}\n"
7872"\n"
7873"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7874"_mm256_permutevar8x32_epi32(__m256i __a, __m256i __b)\n"
7875"{\n"
7876" return (__m256i)__builtin_ia32_permvarsi256((__v8si)__a, (__v8si)__b);\n"
7877"}\n"
7878"\n"
7879"#define _mm256_permute4x64_pd(V, M) \\\n"
7880" (__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(V), (int)(M))\n"
7881"\n"
7882"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
7883"_mm256_permutevar8x32_ps(__m256 __a, __m256i __b)\n"
7884"{\n"
7885" return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8si)__b);\n"
7886"}\n"
7887"\n"
7888"#define _mm256_permute4x64_epi64(V, M) \\\n"
7889" (__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(V), (int)(M))\n"
7890"\n"
7891"#define _mm256_permute2x128_si256(V1, V2, M) \\\n"
7892" (__m256i)__builtin_ia32_permti256((__m256i)(V1), (__m256i)(V2), (int)(M))\n"
7893"\n"
7894"#define _mm256_extracti128_si256(V, M) \\\n"
7895" (__m128i)__builtin_ia32_extract128i256((__v4di)(__m256i)(V), (int)(M))\n"
7896"\n"
7897"#define _mm256_inserti128_si256(V1, V2, M) \\\n"
7898" (__m256i)__builtin_ia32_insert128i256((__v4di)(__m256i)(V1), \\\n"
7899" (__v2di)(__m128i)(V2), (int)(M))\n"
7900"\n"
7901"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7902"_mm256_maskload_epi32(int const *__X, __m256i __M)\n"
7903"{\n"
7904" return (__m256i)__builtin_ia32_maskloadd256((const __v8si *)__X, (__v8si)__M);\n"
7905"}\n"
7906"\n"
7907"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7908"_mm256_maskload_epi64(long long const *__X, __m256i __M)\n"
7909"{\n"
7910" return (__m256i)__builtin_ia32_maskloadq256((const __v4di *)__X, (__v4di)__M);\n"
7911"}\n"
7912"\n"
7913"static __inline__ __m128i __DEFAULT_FN_ATTRS128\n"
7914"_mm_maskload_epi32(int const *__X, __m128i __M)\n"
7915"{\n"
7916" return (__m128i)__builtin_ia32_maskloadd((const __v4si *)__X, (__v4si)__M);\n"
7917"}\n"
7918"\n"
7919"static __inline__ __m128i __DEFAULT_FN_ATTRS128\n"
7920"_mm_maskload_epi64(long long const *__X, __m128i __M)\n"
7921"{\n"
7922" return (__m128i)__builtin_ia32_maskloadq((const __v2di *)__X, (__v2di)__M);\n"
7923"}\n"
7924"\n"
7925"static __inline__ void __DEFAULT_FN_ATTRS256\n"
7926"_mm256_maskstore_epi32(int *__X, __m256i __M, __m256i __Y)\n"
7927"{\n"
7928" __builtin_ia32_maskstored256((__v8si *)__X, (__v8si)__M, (__v8si)__Y);\n"
7929"}\n"
7930"\n"
7931"static __inline__ void __DEFAULT_FN_ATTRS256\n"
7932"_mm256_maskstore_epi64(long long *__X, __m256i __M, __m256i __Y)\n"
7933"{\n"
7934" __builtin_ia32_maskstoreq256((__v4di *)__X, (__v4di)__M, (__v4di)__Y);\n"
7935"}\n"
7936"\n"
7937"static __inline__ void __DEFAULT_FN_ATTRS128\n"
7938"_mm_maskstore_epi32(int *__X, __m128i __M, __m128i __Y)\n"
7939"{\n"
7940" __builtin_ia32_maskstored((__v4si *)__X, (__v4si)__M, (__v4si)__Y);\n"
7941"}\n"
7942"\n"
7943"static __inline__ void __DEFAULT_FN_ATTRS128\n"
7944"_mm_maskstore_epi64(long long *__X, __m128i __M, __m128i __Y)\n"
7945"{\n"
7946" __builtin_ia32_maskstoreq(( __v2di *)__X, (__v2di)__M, (__v2di)__Y);\n"
7947"}\n"
7948"\n"
7949"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7950"_mm256_sllv_epi32(__m256i __X, __m256i __Y)\n"
7951"{\n"
7952" return (__m256i)__builtin_ia32_psllv8si((__v8si)__X, (__v8si)__Y);\n"
7953"}\n"
7954"\n"
7955"static __inline__ __m128i __DEFAULT_FN_ATTRS128\n"
7956"_mm_sllv_epi32(__m128i __X, __m128i __Y)\n"
7957"{\n"
7958" return (__m128i)__builtin_ia32_psllv4si((__v4si)__X, (__v4si)__Y);\n"
7959"}\n"
7960"\n"
7961"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7962"_mm256_sllv_epi64(__m256i __X, __m256i __Y)\n"
7963"{\n"
7964" return (__m256i)__builtin_ia32_psllv4di((__v4di)__X, (__v4di)__Y);\n"
7965"}\n"
7966"\n"
7967"static __inline__ __m128i __DEFAULT_FN_ATTRS128\n"
7968"_mm_sllv_epi64(__m128i __X, __m128i __Y)\n"
7969"{\n"
7970" return (__m128i)__builtin_ia32_psllv2di((__v2di)__X, (__v2di)__Y);\n"
7971"}\n"
7972"\n"
7973"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7974"_mm256_srav_epi32(__m256i __X, __m256i __Y)\n"
7975"{\n"
7976" return (__m256i)__builtin_ia32_psrav8si((__v8si)__X, (__v8si)__Y);\n"
7977"}\n"
7978"\n"
7979"static __inline__ __m128i __DEFAULT_FN_ATTRS128\n"
7980"_mm_srav_epi32(__m128i __X, __m128i __Y)\n"
7981"{\n"
7982" return (__m128i)__builtin_ia32_psrav4si((__v4si)__X, (__v4si)__Y);\n"
7983"}\n"
7984"\n"
7985"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7986"_mm256_srlv_epi32(__m256i __X, __m256i __Y)\n"
7987"{\n"
7988" return (__m256i)__builtin_ia32_psrlv8si((__v8si)__X, (__v8si)__Y);\n"
7989"}\n"
7990"\n"
7991"static __inline__ __m128i __DEFAULT_FN_ATTRS128\n"
7992"_mm_srlv_epi32(__m128i __X, __m128i __Y)\n"
7993"{\n"
7994" return (__m128i)__builtin_ia32_psrlv4si((__v4si)__X, (__v4si)__Y);\n"
7995"}\n"
7996"\n"
7997"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
7998"_mm256_srlv_epi64(__m256i __X, __m256i __Y)\n"
7999"{\n"
8000" return (__m256i)__builtin_ia32_psrlv4di((__v4di)__X, (__v4di)__Y);\n"
8001"}\n"
8002"\n"
8003"static __inline__ __m128i __DEFAULT_FN_ATTRS128\n"
8004"_mm_srlv_epi64(__m128i __X, __m128i __Y)\n"
8005"{\n"
8006" return (__m128i)__builtin_ia32_psrlv2di((__v2di)__X, (__v2di)__Y);\n"
8007"}\n"
8008"\n"
8009"#define _mm_mask_i32gather_pd(a, m, i, mask, s) \\\n"
8010" (__m128d)__builtin_ia32_gatherd_pd((__v2df)(__m128i)(a), \\\n"
8011" (double const *)(m), \\\n"
8012" (__v4si)(__m128i)(i), \\\n"
8013" (__v2df)(__m128d)(mask), (s))\n"
8014"\n"
8015"#define _mm256_mask_i32gather_pd(a, m, i, mask, s) \\\n"
8016" (__m256d)__builtin_ia32_gatherd_pd256((__v4df)(__m256d)(a), \\\n"
8017" (double const *)(m), \\\n"
8018" (__v4si)(__m128i)(i), \\\n"
8019" (__v4df)(__m256d)(mask), (s))\n"
8020"\n"
8021"#define _mm_mask_i64gather_pd(a, m, i, mask, s) \\\n"
8022" (__m128d)__builtin_ia32_gatherq_pd((__v2df)(__m128d)(a), \\\n"
8023" (double const *)(m), \\\n"
8024" (__v2di)(__m128i)(i), \\\n"
8025" (__v2df)(__m128d)(mask), (s))\n"
8026"\n"
8027"#define _mm256_mask_i64gather_pd(a, m, i, mask, s) \\\n"
8028" (__m256d)__builtin_ia32_gatherq_pd256((__v4df)(__m256d)(a), \\\n"
8029" (double const *)(m), \\\n"
8030" (__v4di)(__m256i)(i), \\\n"
8031" (__v4df)(__m256d)(mask), (s))\n"
8032"\n"
8033"#define _mm_mask_i32gather_ps(a, m, i, mask, s) \\\n"
8034" (__m128)__builtin_ia32_gatherd_ps((__v4sf)(__m128)(a), \\\n"
8035" (float const *)(m), \\\n"
8036" (__v4si)(__m128i)(i), \\\n"
8037" (__v4sf)(__m128)(mask), (s))\n"
8038"\n"
8039"#define _mm256_mask_i32gather_ps(a, m, i, mask, s) \\\n"
8040" (__m256)__builtin_ia32_gatherd_ps256((__v8sf)(__m256)(a), \\\n"
8041" (float const *)(m), \\\n"
8042" (__v8si)(__m256i)(i), \\\n"
8043" (__v8sf)(__m256)(mask), (s))\n"
8044"\n"
8045"#define _mm_mask_i64gather_ps(a, m, i, mask, s) \\\n"
8046" (__m128)__builtin_ia32_gatherq_ps((__v4sf)(__m128)(a), \\\n"
8047" (float const *)(m), \\\n"
8048" (__v2di)(__m128i)(i), \\\n"
8049" (__v4sf)(__m128)(mask), (s))\n"
8050"\n"
8051"#define _mm256_mask_i64gather_ps(a, m, i, mask, s) \\\n"
8052" (__m128)__builtin_ia32_gatherq_ps256((__v4sf)(__m128)(a), \\\n"
8053" (float const *)(m), \\\n"
8054" (__v4di)(__m256i)(i), \\\n"
8055" (__v4sf)(__m128)(mask), (s))\n"
8056"\n"
8057"#define _mm_mask_i32gather_epi32(a, m, i, mask, s) \\\n"
8058" (__m128i)__builtin_ia32_gatherd_d((__v4si)(__m128i)(a), \\\n"
8059" (int const *)(m), \\\n"
8060" (__v4si)(__m128i)(i), \\\n"
8061" (__v4si)(__m128i)(mask), (s))\n"
8062"\n"
8063"#define _mm256_mask_i32gather_epi32(a, m, i, mask, s) \\\n"
8064" (__m256i)__builtin_ia32_gatherd_d256((__v8si)(__m256i)(a), \\\n"
8065" (int const *)(m), \\\n"
8066" (__v8si)(__m256i)(i), \\\n"
8067" (__v8si)(__m256i)(mask), (s))\n"
8068"\n"
8069"#define _mm_mask_i64gather_epi32(a, m, i, mask, s) \\\n"
8070" (__m128i)__builtin_ia32_gatherq_d((__v4si)(__m128i)(a), \\\n"
8071" (int const *)(m), \\\n"
8072" (__v2di)(__m128i)(i), \\\n"
8073" (__v4si)(__m128i)(mask), (s))\n"
8074"\n"
8075"#define _mm256_mask_i64gather_epi32(a, m, i, mask, s) \\\n"
8076" (__m128i)__builtin_ia32_gatherq_d256((__v4si)(__m128i)(a), \\\n"
8077" (int const *)(m), \\\n"
8078" (__v4di)(__m256i)(i), \\\n"
8079" (__v4si)(__m128i)(mask), (s))\n"
8080"\n"
8081"#define _mm_mask_i32gather_epi64(a, m, i, mask, s) \\\n"
8082" (__m128i)__builtin_ia32_gatherd_q((__v2di)(__m128i)(a), \\\n"
8083" (long long const *)(m), \\\n"
8084" (__v4si)(__m128i)(i), \\\n"
8085" (__v2di)(__m128i)(mask), (s))\n"
8086"\n"
8087"#define _mm256_mask_i32gather_epi64(a, m, i, mask, s) \\\n"
8088" (__m256i)__builtin_ia32_gatherd_q256((__v4di)(__m256i)(a), \\\n"
8089" (long long const *)(m), \\\n"
8090" (__v4si)(__m128i)(i), \\\n"
8091" (__v4di)(__m256i)(mask), (s))\n"
8092"\n"
8093"#define _mm_mask_i64gather_epi64(a, m, i, mask, s) \\\n"
8094" (__m128i)__builtin_ia32_gatherq_q((__v2di)(__m128i)(a), \\\n"
8095" (long long const *)(m), \\\n"
8096" (__v2di)(__m128i)(i), \\\n"
8097" (__v2di)(__m128i)(mask), (s))\n"
8098"\n"
8099"#define _mm256_mask_i64gather_epi64(a, m, i, mask, s) \\\n"
8100" (__m256i)__builtin_ia32_gatherq_q256((__v4di)(__m256i)(a), \\\n"
8101" (long long const *)(m), \\\n"
8102" (__v4di)(__m256i)(i), \\\n"
8103" (__v4di)(__m256i)(mask), (s))\n"
8104"\n"
8105"#define _mm_i32gather_pd(m, i, s) \\\n"
8106" (__m128d)__builtin_ia32_gatherd_pd((__v2df)_mm_undefined_pd(), \\\n"
8107" (double const *)(m), \\\n"
8108" (__v4si)(__m128i)(i), \\\n"
8109" (__v2df)_mm_cmpeq_pd(_mm_setzero_pd(), \\\n"
8110" _mm_setzero_pd()), \\\n"
8111" (s))\n"
8112"\n"
8113"#define _mm256_i32gather_pd(m, i, s) \\\n"
8114" (__m256d)__builtin_ia32_gatherd_pd256((__v4df)_mm256_undefined_pd(), \\\n"
8115" (double const *)(m), \\\n"
8116" (__v4si)(__m128i)(i), \\\n"
8117" (__v4df)_mm256_cmp_pd(_mm256_setzero_pd(), \\\n"
8118" _mm256_setzero_pd(), \\\n"
8119" _CMP_EQ_OQ), \\\n"
8120" (s))\n"
8121"\n"
8122"#define _mm_i64gather_pd(m, i, s) \\\n"
8123" (__m128d)__builtin_ia32_gatherq_pd((__v2df)_mm_undefined_pd(), \\\n"
8124" (double const *)(m), \\\n"
8125" (__v2di)(__m128i)(i), \\\n"
8126" (__v2df)_mm_cmpeq_pd(_mm_setzero_pd(), \\\n"
8127" _mm_setzero_pd()), \\\n"
8128" (s))\n"
8129"\n"
8130"#define _mm256_i64gather_pd(m, i, s) \\\n"
8131" (__m256d)__builtin_ia32_gatherq_pd256((__v4df)_mm256_undefined_pd(), \\\n"
8132" (double const *)(m), \\\n"
8133" (__v4di)(__m256i)(i), \\\n"
8134" (__v4df)_mm256_cmp_pd(_mm256_setzero_pd(), \\\n"
8135" _mm256_setzero_pd(), \\\n"
8136" _CMP_EQ_OQ), \\\n"
8137" (s))\n"
8138"\n"
8139"#define _mm_i32gather_ps(m, i, s) \\\n"
8140" (__m128)__builtin_ia32_gatherd_ps((__v4sf)_mm_undefined_ps(), \\\n"
8141" (float const *)(m), \\\n"
8142" (__v4si)(__m128i)(i), \\\n"
8143" (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \\\n"
8144" _mm_setzero_ps()), \\\n"
8145" (s))\n"
8146"\n"
8147"#define _mm256_i32gather_ps(m, i, s) \\\n"
8148" (__m256)__builtin_ia32_gatherd_ps256((__v8sf)_mm256_undefined_ps(), \\\n"
8149" (float const *)(m), \\\n"
8150" (__v8si)(__m256i)(i), \\\n"
8151" (__v8sf)_mm256_cmp_ps(_mm256_setzero_ps(), \\\n"
8152" _mm256_setzero_ps(), \\\n"
8153" _CMP_EQ_OQ), \\\n"
8154" (s))\n"
8155"\n"
8156"#define _mm_i64gather_ps(m, i, s) \\\n"
8157" (__m128)__builtin_ia32_gatherq_ps((__v4sf)_mm_undefined_ps(), \\\n"
8158" (float const *)(m), \\\n"
8159" (__v2di)(__m128i)(i), \\\n"
8160" (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \\\n"
8161" _mm_setzero_ps()), \\\n"
8162" (s))\n"
8163"\n"
8164"#define _mm256_i64gather_ps(m, i, s) \\\n"
8165" (__m128)__builtin_ia32_gatherq_ps256((__v4sf)_mm_undefined_ps(), \\\n"
8166" (float const *)(m), \\\n"
8167" (__v4di)(__m256i)(i), \\\n"
8168" (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \\\n"
8169" _mm_setzero_ps()), \\\n"
8170" (s))\n"
8171"\n"
8172"#define _mm_i32gather_epi32(m, i, s) \\\n"
8173" (__m128i)__builtin_ia32_gatherd_d((__v4si)_mm_undefined_si128(), \\\n"
8174" (int const *)(m), (__v4si)(__m128i)(i), \\\n"
8175" (__v4si)_mm_set1_epi32(-1), (s))\n"
8176"\n"
8177"#define _mm256_i32gather_epi32(m, i, s) \\\n"
8178" (__m256i)__builtin_ia32_gatherd_d256((__v8si)_mm256_undefined_si256(), \\\n"
8179" (int const *)(m), (__v8si)(__m256i)(i), \\\n"
8180" (__v8si)_mm256_set1_epi32(-1), (s))\n"
8181"\n"
8182"#define _mm_i64gather_epi32(m, i, s) \\\n"
8183" (__m128i)__builtin_ia32_gatherq_d((__v4si)_mm_undefined_si128(), \\\n"
8184" (int const *)(m), (__v2di)(__m128i)(i), \\\n"
8185" (__v4si)_mm_set1_epi32(-1), (s))\n"
8186"\n"
8187"#define _mm256_i64gather_epi32(m, i, s) \\\n"
8188" (__m128i)__builtin_ia32_gatherq_d256((__v4si)_mm_undefined_si128(), \\\n"
8189" (int const *)(m), (__v4di)(__m256i)(i), \\\n"
8190" (__v4si)_mm_set1_epi32(-1), (s))\n"
8191"\n"
8192"#define _mm_i32gather_epi64(m, i, s) \\\n"
8193" (__m128i)__builtin_ia32_gatherd_q((__v2di)_mm_undefined_si128(), \\\n"
8194" (long long const *)(m), \\\n"
8195" (__v4si)(__m128i)(i), \\\n"
8196" (__v2di)_mm_set1_epi64x(-1), (s))\n"
8197"\n"
8198"#define _mm256_i32gather_epi64(m, i, s) \\\n"
8199" (__m256i)__builtin_ia32_gatherd_q256((__v4di)_mm256_undefined_si256(), \\\n"
8200" (long long const *)(m), \\\n"
8201" (__v4si)(__m128i)(i), \\\n"
8202" (__v4di)_mm256_set1_epi64x(-1), (s))\n"
8203"\n"
8204"#define _mm_i64gather_epi64(m, i, s) \\\n"
8205" (__m128i)__builtin_ia32_gatherq_q((__v2di)_mm_undefined_si128(), \\\n"
8206" (long long const *)(m), \\\n"
8207" (__v2di)(__m128i)(i), \\\n"
8208" (__v2di)_mm_set1_epi64x(-1), (s))\n"
8209"\n"
8210"#define _mm256_i64gather_epi64(m, i, s) \\\n"
8211" (__m256i)__builtin_ia32_gatherq_q256((__v4di)_mm256_undefined_si256(), \\\n"
8212" (long long const *)(m), \\\n"
8213" (__v4di)(__m256i)(i), \\\n"
8214" (__v4di)_mm256_set1_epi64x(-1), (s))\n"
8215"\n"
8216"#undef __DEFAULT_FN_ATTRS256\n"
8217"#undef __DEFAULT_FN_ATTRS128\n"
8218"\n"
8219"#endif /* __AVX2INTRIN_H */\n"
8220"" } ,
8221 { "/builtins/avxintrin.h" , "/*===---- avxintrin.h - AVX intrinsics -------------------------------------===\n"
8222" *\n"
8223" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
8224" * of this software and associated documentation files (the \"Software\"), to deal\n"
8225" * in the Software without restriction, including without limitation the rights\n"
8226" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
8227" * copies of the Software, and to permit persons to whom the Software is\n"
8228" * furnished to do so, subject to the following conditions:\n"
8229" *\n"
8230" * The above copyright notice and this permission notice shall be included in\n"
8231" * all copies or substantial portions of the Software.\n"
8232" *\n"
8233" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
8234" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
8235" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
8236" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
8237" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
8238" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
8239" * THE SOFTWARE.\n"
8240" *\n"
8241" *===-----------------------------------------------------------------------===\n"
8242" */\n"
8243"\n"
8244"#ifndef __IMMINTRIN_H\n"
8245"#error \"Never use <avxintrin.h> directly; include <immintrin.h> instead.\"\n"
8246"#endif\n"
8247"\n"
8248"#ifndef __AVXINTRIN_H\n"
8249"#define __AVXINTRIN_H\n"
8250"\n"
8251"typedef double __v4df __attribute__ ((__vector_size__ (32)));\n"
8252"typedef float __v8sf __attribute__ ((__vector_size__ (32)));\n"
8253"typedef long long __v4di __attribute__ ((__vector_size__ (32)));\n"
8254"typedef int __v8si __attribute__ ((__vector_size__ (32)));\n"
8255"typedef short __v16hi __attribute__ ((__vector_size__ (32)));\n"
8256"typedef char __v32qi __attribute__ ((__vector_size__ (32)));\n"
8257"\n"
8258"/* Unsigned types */\n"
8259"typedef unsigned long long __v4du __attribute__ ((__vector_size__ (32)));\n"
8260"typedef unsigned int __v8su __attribute__ ((__vector_size__ (32)));\n"
8261"typedef unsigned short __v16hu __attribute__ ((__vector_size__ (32)));\n"
8262"typedef unsigned char __v32qu __attribute__ ((__vector_size__ (32)));\n"
8263"\n"
8264"/* We need an explicitly signed variant for char. Note that this shouldn't\n"
8265" * appear in the interface though. */\n"
8266"typedef signed char __v32qs __attribute__((__vector_size__(32)));\n"
8267"\n"
8268"typedef float __m256 __attribute__ ((__vector_size__ (32)));\n"
8269"typedef double __m256d __attribute__((__vector_size__(32)));\n"
8270"typedef long long __m256i __attribute__((__vector_size__(32)));\n"
8271"\n"
8272"/* Define the default attributes for the functions in this file. */\n"
8273"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"avx\"), __min_vector_width__(256)))\n"
8274"#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__(\"avx\"), __min_vector_width__(128)))\n"
8275"\n"
8276"/* Arithmetic */\n"
8277"/// Adds two 256-bit vectors of [4 x double].\n"
8278"///\n"
8279"/// \\headerfile <x86intrin.h>\n"
8280"///\n"
8281"/// This intrinsic corresponds to the <c> VADDPD </c> instruction.\n"
8282"///\n"
8283"/// \\param __a\n"
8284"/// A 256-bit vector of [4 x double] containing one of the source operands.\n"
8285"/// \\param __b\n"
8286"/// A 256-bit vector of [4 x double] containing one of the source operands.\n"
8287"/// \\returns A 256-bit vector of [4 x double] containing the sums of both\n"
8288"/// operands.\n"
8289"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8290"_mm256_add_pd(__m256d __a, __m256d __b)\n"
8291"{\n"
8292" return (__m256d)((__v4df)__a+(__v4df)__b);\n"
8293"}\n"
8294"\n"
8295"/// Adds two 256-bit vectors of [8 x float].\n"
8296"///\n"
8297"/// \\headerfile <x86intrin.h>\n"
8298"///\n"
8299"/// This intrinsic corresponds to the <c> VADDPS </c> instruction.\n"
8300"///\n"
8301"/// \\param __a\n"
8302"/// A 256-bit vector of [8 x float] containing one of the source operands.\n"
8303"/// \\param __b\n"
8304"/// A 256-bit vector of [8 x float] containing one of the source operands.\n"
8305"/// \\returns A 256-bit vector of [8 x float] containing the sums of both\n"
8306"/// operands.\n"
8307"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8308"_mm256_add_ps(__m256 __a, __m256 __b)\n"
8309"{\n"
8310" return (__m256)((__v8sf)__a+(__v8sf)__b);\n"
8311"}\n"
8312"\n"
8313"/// Subtracts two 256-bit vectors of [4 x double].\n"
8314"///\n"
8315"/// \\headerfile <x86intrin.h>\n"
8316"///\n"
8317"/// This intrinsic corresponds to the <c> VSUBPD </c> instruction.\n"
8318"///\n"
8319"/// \\param __a\n"
8320"/// A 256-bit vector of [4 x double] containing the minuend.\n"
8321"/// \\param __b\n"
8322"/// A 256-bit vector of [4 x double] containing the subtrahend.\n"
8323"/// \\returns A 256-bit vector of [4 x double] containing the differences between\n"
8324"/// both operands.\n"
8325"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8326"_mm256_sub_pd(__m256d __a, __m256d __b)\n"
8327"{\n"
8328" return (__m256d)((__v4df)__a-(__v4df)__b);\n"
8329"}\n"
8330"\n"
8331"/// Subtracts two 256-bit vectors of [8 x float].\n"
8332"///\n"
8333"/// \\headerfile <x86intrin.h>\n"
8334"///\n"
8335"/// This intrinsic corresponds to the <c> VSUBPS </c> instruction.\n"
8336"///\n"
8337"/// \\param __a\n"
8338"/// A 256-bit vector of [8 x float] containing the minuend.\n"
8339"/// \\param __b\n"
8340"/// A 256-bit vector of [8 x float] containing the subtrahend.\n"
8341"/// \\returns A 256-bit vector of [8 x float] containing the differences between\n"
8342"/// both operands.\n"
8343"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8344"_mm256_sub_ps(__m256 __a, __m256 __b)\n"
8345"{\n"
8346" return (__m256)((__v8sf)__a-(__v8sf)__b);\n"
8347"}\n"
8348"\n"
8349"/// Adds the even-indexed values and subtracts the odd-indexed values of\n"
8350"/// two 256-bit vectors of [4 x double].\n"
8351"///\n"
8352"/// \\headerfile <x86intrin.h>\n"
8353"///\n"
8354"/// This intrinsic corresponds to the <c> VADDSUBPD </c> instruction.\n"
8355"///\n"
8356"/// \\param __a\n"
8357"/// A 256-bit vector of [4 x double] containing the left source operand.\n"
8358"/// \\param __b\n"
8359"/// A 256-bit vector of [4 x double] containing the right source operand.\n"
8360"/// \\returns A 256-bit vector of [4 x double] containing the alternating sums\n"
8361"/// and differences between both operands.\n"
8362"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8363"_mm256_addsub_pd(__m256d __a, __m256d __b)\n"
8364"{\n"
8365" return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b);\n"
8366"}\n"
8367"\n"
8368"/// Adds the even-indexed values and subtracts the odd-indexed values of\n"
8369"/// two 256-bit vectors of [8 x float].\n"
8370"///\n"
8371"/// \\headerfile <x86intrin.h>\n"
8372"///\n"
8373"/// This intrinsic corresponds to the <c> VADDSUBPS </c> instruction.\n"
8374"///\n"
8375"/// \\param __a\n"
8376"/// A 256-bit vector of [8 x float] containing the left source operand.\n"
8377"/// \\param __b\n"
8378"/// A 256-bit vector of [8 x float] containing the right source operand.\n"
8379"/// \\returns A 256-bit vector of [8 x float] containing the alternating sums and\n"
8380"/// differences between both operands.\n"
8381"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8382"_mm256_addsub_ps(__m256 __a, __m256 __b)\n"
8383"{\n"
8384" return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b);\n"
8385"}\n"
8386"\n"
8387"/// Divides two 256-bit vectors of [4 x double].\n"
8388"///\n"
8389"/// \\headerfile <x86intrin.h>\n"
8390"///\n"
8391"/// This intrinsic corresponds to the <c> VDIVPD </c> instruction.\n"
8392"///\n"
8393"/// \\param __a\n"
8394"/// A 256-bit vector of [4 x double] containing the dividend.\n"
8395"/// \\param __b\n"
8396"/// A 256-bit vector of [4 x double] containing the divisor.\n"
8397"/// \\returns A 256-bit vector of [4 x double] containing the quotients of both\n"
8398"/// operands.\n"
8399"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8400"_mm256_div_pd(__m256d __a, __m256d __b)\n"
8401"{\n"
8402" return (__m256d)((__v4df)__a/(__v4df)__b);\n"
8403"}\n"
8404"\n"
8405"/// Divides two 256-bit vectors of [8 x float].\n"
8406"///\n"
8407"/// \\headerfile <x86intrin.h>\n"
8408"///\n"
8409"/// This intrinsic corresponds to the <c> VDIVPS </c> instruction.\n"
8410"///\n"
8411"/// \\param __a\n"
8412"/// A 256-bit vector of [8 x float] containing the dividend.\n"
8413"/// \\param __b\n"
8414"/// A 256-bit vector of [8 x float] containing the divisor.\n"
8415"/// \\returns A 256-bit vector of [8 x float] containing the quotients of both\n"
8416"/// operands.\n"
8417"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8418"_mm256_div_ps(__m256 __a, __m256 __b)\n"
8419"{\n"
8420" return (__m256)((__v8sf)__a/(__v8sf)__b);\n"
8421"}\n"
8422"\n"
8423"/// Compares two 256-bit vectors of [4 x double] and returns the greater\n"
8424"/// of each pair of values.\n"
8425"///\n"
8426"/// \\headerfile <x86intrin.h>\n"
8427"///\n"
8428"/// This intrinsic corresponds to the <c> VMAXPD </c> instruction.\n"
8429"///\n"
8430"/// \\param __a\n"
8431"/// A 256-bit vector of [4 x double] containing one of the operands.\n"
8432"/// \\param __b\n"
8433"/// A 256-bit vector of [4 x double] containing one of the operands.\n"
8434"/// \\returns A 256-bit vector of [4 x double] containing the maximum values\n"
8435"/// between both operands.\n"
8436"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8437"_mm256_max_pd(__m256d __a, __m256d __b)\n"
8438"{\n"
8439" return (__m256d)__builtin_ia32_maxpd256((__v4df)__a, (__v4df)__b);\n"
8440"}\n"
8441"\n"
8442"/// Compares two 256-bit vectors of [8 x float] and returns the greater\n"
8443"/// of each pair of values.\n"
8444"///\n"
8445"/// \\headerfile <x86intrin.h>\n"
8446"///\n"
8447"/// This intrinsic corresponds to the <c> VMAXPS </c> instruction.\n"
8448"///\n"
8449"/// \\param __a\n"
8450"/// A 256-bit vector of [8 x float] containing one of the operands.\n"
8451"/// \\param __b\n"
8452"/// A 256-bit vector of [8 x float] containing one of the operands.\n"
8453"/// \\returns A 256-bit vector of [8 x float] containing the maximum values\n"
8454"/// between both operands.\n"
8455"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8456"_mm256_max_ps(__m256 __a, __m256 __b)\n"
8457"{\n"
8458" return (__m256)__builtin_ia32_maxps256((__v8sf)__a, (__v8sf)__b);\n"
8459"}\n"
8460"\n"
8461"/// Compares two 256-bit vectors of [4 x double] and returns the lesser\n"
8462"/// of each pair of values.\n"
8463"///\n"
8464"/// \\headerfile <x86intrin.h>\n"
8465"///\n"
8466"/// This intrinsic corresponds to the <c> VMINPD </c> instruction.\n"
8467"///\n"
8468"/// \\param __a\n"
8469"/// A 256-bit vector of [4 x double] containing one of the operands.\n"
8470"/// \\param __b\n"
8471"/// A 256-bit vector of [4 x double] containing one of the operands.\n"
8472"/// \\returns A 256-bit vector of [4 x double] containing the minimum values\n"
8473"/// between both operands.\n"
8474"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8475"_mm256_min_pd(__m256d __a, __m256d __b)\n"
8476"{\n"
8477" return (__m256d)__builtin_ia32_minpd256((__v4df)__a, (__v4df)__b);\n"
8478"}\n"
8479"\n"
8480"/// Compares two 256-bit vectors of [8 x float] and returns the lesser\n"
8481"/// of each pair of values.\n"
8482"///\n"
8483"/// \\headerfile <x86intrin.h>\n"
8484"///\n"
8485"/// This intrinsic corresponds to the <c> VMINPS </c> instruction.\n"
8486"///\n"
8487"/// \\param __a\n"
8488"/// A 256-bit vector of [8 x float] containing one of the operands.\n"
8489"/// \\param __b\n"
8490"/// A 256-bit vector of [8 x float] containing one of the operands.\n"
8491"/// \\returns A 256-bit vector of [8 x float] containing the minimum values\n"
8492"/// between both operands.\n"
8493"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8494"_mm256_min_ps(__m256 __a, __m256 __b)\n"
8495"{\n"
8496" return (__m256)__builtin_ia32_minps256((__v8sf)__a, (__v8sf)__b);\n"
8497"}\n"
8498"\n"
8499"/// Multiplies two 256-bit vectors of [4 x double].\n"
8500"///\n"
8501"/// \\headerfile <x86intrin.h>\n"
8502"///\n"
8503"/// This intrinsic corresponds to the <c> VMULPD </c> instruction.\n"
8504"///\n"
8505"/// \\param __a\n"
8506"/// A 256-bit vector of [4 x double] containing one of the operands.\n"
8507"/// \\param __b\n"
8508"/// A 256-bit vector of [4 x double] containing one of the operands.\n"
8509"/// \\returns A 256-bit vector of [4 x double] containing the products of both\n"
8510"/// operands.\n"
8511"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8512"_mm256_mul_pd(__m256d __a, __m256d __b)\n"
8513"{\n"
8514" return (__m256d)((__v4df)__a * (__v4df)__b);\n"
8515"}\n"
8516"\n"
8517"/// Multiplies two 256-bit vectors of [8 x float].\n"
8518"///\n"
8519"/// \\headerfile <x86intrin.h>\n"
8520"///\n"
8521"/// This intrinsic corresponds to the <c> VMULPS </c> instruction.\n"
8522"///\n"
8523"/// \\param __a\n"
8524"/// A 256-bit vector of [8 x float] containing one of the operands.\n"
8525"/// \\param __b\n"
8526"/// A 256-bit vector of [8 x float] containing one of the operands.\n"
8527"/// \\returns A 256-bit vector of [8 x float] containing the products of both\n"
8528"/// operands.\n"
8529"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8530"_mm256_mul_ps(__m256 __a, __m256 __b)\n"
8531"{\n"
8532" return (__m256)((__v8sf)__a * (__v8sf)__b);\n"
8533"}\n"
8534"\n"
8535"/// Calculates the square roots of the values in a 256-bit vector of\n"
8536"/// [4 x double].\n"
8537"///\n"
8538"/// \\headerfile <x86intrin.h>\n"
8539"///\n"
8540"/// This intrinsic corresponds to the <c> VSQRTPD </c> instruction.\n"
8541"///\n"
8542"/// \\param __a\n"
8543"/// A 256-bit vector of [4 x double].\n"
8544"/// \\returns A 256-bit vector of [4 x double] containing the square roots of the\n"
8545"/// values in the operand.\n"
8546"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8547"_mm256_sqrt_pd(__m256d __a)\n"
8548"{\n"
8549" return (__m256d)__builtin_ia32_sqrtpd256((__v4df)__a);\n"
8550"}\n"
8551"\n"
8552"/// Calculates the square roots of the values in a 256-bit vector of\n"
8553"/// [8 x float].\n"
8554"///\n"
8555"/// \\headerfile <x86intrin.h>\n"
8556"///\n"
8557"/// This intrinsic corresponds to the <c> VSQRTPS </c> instruction.\n"
8558"///\n"
8559"/// \\param __a\n"
8560"/// A 256-bit vector of [8 x float].\n"
8561"/// \\returns A 256-bit vector of [8 x float] containing the square roots of the\n"
8562"/// values in the operand.\n"
8563"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8564"_mm256_sqrt_ps(__m256 __a)\n"
8565"{\n"
8566" return (__m256)__builtin_ia32_sqrtps256((__v8sf)__a);\n"
8567"}\n"
8568"\n"
8569"/// Calculates the reciprocal square roots of the values in a 256-bit\n"
8570"/// vector of [8 x float].\n"
8571"///\n"
8572"/// \\headerfile <x86intrin.h>\n"
8573"///\n"
8574"/// This intrinsic corresponds to the <c> VRSQRTPS </c> instruction.\n"
8575"///\n"
8576"/// \\param __a\n"
8577"/// A 256-bit vector of [8 x float].\n"
8578"/// \\returns A 256-bit vector of [8 x float] containing the reciprocal square\n"
8579"/// roots of the values in the operand.\n"
8580"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8581"_mm256_rsqrt_ps(__m256 __a)\n"
8582"{\n"
8583" return (__m256)__builtin_ia32_rsqrtps256((__v8sf)__a);\n"
8584"}\n"
8585"\n"
8586"/// Calculates the reciprocals of the values in a 256-bit vector of\n"
8587"/// [8 x float].\n"
8588"///\n"
8589"/// \\headerfile <x86intrin.h>\n"
8590"///\n"
8591"/// This intrinsic corresponds to the <c> VRCPPS </c> instruction.\n"
8592"///\n"
8593"/// \\param __a\n"
8594"/// A 256-bit vector of [8 x float].\n"
8595"/// \\returns A 256-bit vector of [8 x float] containing the reciprocals of the\n"
8596"/// values in the operand.\n"
8597"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8598"_mm256_rcp_ps(__m256 __a)\n"
8599"{\n"
8600" return (__m256)__builtin_ia32_rcpps256((__v8sf)__a);\n"
8601"}\n"
8602"\n"
8603"/// Rounds the values in a 256-bit vector of [4 x double] as specified\n"
8604"/// by the byte operand. The source values are rounded to integer values and\n"
8605"/// returned as 64-bit double-precision floating-point values.\n"
8606"///\n"
8607"/// \\headerfile <x86intrin.h>\n"
8608"///\n"
8609"/// \\code\n"
8610"/// __m256d _mm256_round_pd(__m256d V, const int M);\n"
8611"/// \\endcode\n"
8612"///\n"
8613"/// This intrinsic corresponds to the <c> VROUNDPD </c> instruction.\n"
8614"///\n"
8615"/// \\param V\n"
8616"/// A 256-bit vector of [4 x double].\n"
8617"/// \\param M\n"
8618"/// An integer value that specifies the rounding operation. \\n\n"
8619"/// Bits [7:4] are reserved. \\n\n"
8620"/// Bit [3] is a precision exception value: \\n\n"
8621"/// 0: A normal PE exception is used. \\n\n"
8622"/// 1: The PE field is not updated. \\n\n"
8623"/// Bit [2] is the rounding control source: \\n\n"
8624"/// 0: Use bits [1:0] of \\a M. \\n\n"
8625"/// 1: Use the current MXCSR setting. \\n\n"
8626"/// Bits [1:0] contain the rounding control definition: \\n\n"
8627"/// 00: Nearest. \\n\n"
8628"/// 01: Downward (toward negative infinity). \\n\n"
8629"/// 10: Upward (toward positive infinity). \\n\n"
8630"/// 11: Truncated.\n"
8631"/// \\returns A 256-bit vector of [4 x double] containing the rounded values.\n"
8632"#define _mm256_round_pd(V, M) \\\n"
8633" (__m256d)__builtin_ia32_roundpd256((__v4df)(__m256d)(V), (M))\n"
8634"\n"
8635"/// Rounds the values stored in a 256-bit vector of [8 x float] as\n"
8636"/// specified by the byte operand. The source values are rounded to integer\n"
8637"/// values and returned as floating-point values.\n"
8638"///\n"
8639"/// \\headerfile <x86intrin.h>\n"
8640"///\n"
8641"/// \\code\n"
8642"/// __m256 _mm256_round_ps(__m256 V, const int M);\n"
8643"/// \\endcode\n"
8644"///\n"
8645"/// This intrinsic corresponds to the <c> VROUNDPS </c> instruction.\n"
8646"///\n"
8647"/// \\param V\n"
8648"/// A 256-bit vector of [8 x float].\n"
8649"/// \\param M\n"
8650"/// An integer value that specifies the rounding operation. \\n\n"
8651"/// Bits [7:4] are reserved. \\n\n"
8652"/// Bit [3] is a precision exception value: \\n\n"
8653"/// 0: A normal PE exception is used. \\n\n"
8654"/// 1: The PE field is not updated. \\n\n"
8655"/// Bit [2] is the rounding control source: \\n\n"
8656"/// 0: Use bits [1:0] of \\a M. \\n\n"
8657"/// 1: Use the current MXCSR setting. \\n\n"
8658"/// Bits [1:0] contain the rounding control definition: \\n\n"
8659"/// 00: Nearest. \\n\n"
8660"/// 01: Downward (toward negative infinity). \\n\n"
8661"/// 10: Upward (toward positive infinity). \\n\n"
8662"/// 11: Truncated.\n"
8663"/// \\returns A 256-bit vector of [8 x float] containing the rounded values.\n"
8664"#define _mm256_round_ps(V, M) \\\n"
8665" (__m256)__builtin_ia32_roundps256((__v8sf)(__m256)(V), (M))\n"
8666"\n"
8667"/// Rounds up the values stored in a 256-bit vector of [4 x double]. The\n"
8668"/// source values are rounded up to integer values and returned as 64-bit\n"
8669"/// double-precision floating-point values.\n"
8670"///\n"
8671"/// \\headerfile <x86intrin.h>\n"
8672"///\n"
8673"/// \\code\n"
8674"/// __m256d _mm256_ceil_pd(__m256d V);\n"
8675"/// \\endcode\n"
8676"///\n"
8677"/// This intrinsic corresponds to the <c> VROUNDPD </c> instruction.\n"
8678"///\n"
8679"/// \\param V\n"
8680"/// A 256-bit vector of [4 x double].\n"
8681"/// \\returns A 256-bit vector of [4 x double] containing the rounded up values.\n"
8682"#define _mm256_ceil_pd(V) _mm256_round_pd((V), _MM_FROUND_CEIL)\n"
8683"\n"
8684"/// Rounds down the values stored in a 256-bit vector of [4 x double].\n"
8685"/// The source values are rounded down to integer values and returned as\n"
8686"/// 64-bit double-precision floating-point values.\n"
8687"///\n"
8688"/// \\headerfile <x86intrin.h>\n"
8689"///\n"
8690"/// \\code\n"
8691"/// __m256d _mm256_floor_pd(__m256d V);\n"
8692"/// \\endcode\n"
8693"///\n"
8694"/// This intrinsic corresponds to the <c> VROUNDPD </c> instruction.\n"
8695"///\n"
8696"/// \\param V\n"
8697"/// A 256-bit vector of [4 x double].\n"
8698"/// \\returns A 256-bit vector of [4 x double] containing the rounded down\n"
8699"/// values.\n"
8700"#define _mm256_floor_pd(V) _mm256_round_pd((V), _MM_FROUND_FLOOR)\n"
8701"\n"
8702"/// Rounds up the values stored in a 256-bit vector of [8 x float]. The\n"
8703"/// source values are rounded up to integer values and returned as\n"
8704"/// floating-point values.\n"
8705"///\n"
8706"/// \\headerfile <x86intrin.h>\n"
8707"///\n"
8708"/// \\code\n"
8709"/// __m256 _mm256_ceil_ps(__m256 V);\n"
8710"/// \\endcode\n"
8711"///\n"
8712"/// This intrinsic corresponds to the <c> VROUNDPS </c> instruction.\n"
8713"///\n"
8714"/// \\param V\n"
8715"/// A 256-bit vector of [8 x float].\n"
8716"/// \\returns A 256-bit vector of [8 x float] containing the rounded up values.\n"
8717"#define _mm256_ceil_ps(V) _mm256_round_ps((V), _MM_FROUND_CEIL)\n"
8718"\n"
8719"/// Rounds down the values stored in a 256-bit vector of [8 x float]. The\n"
8720"/// source values are rounded down to integer values and returned as\n"
8721"/// floating-point values.\n"
8722"///\n"
8723"/// \\headerfile <x86intrin.h>\n"
8724"///\n"
8725"/// \\code\n"
8726"/// __m256 _mm256_floor_ps(__m256 V);\n"
8727"/// \\endcode\n"
8728"///\n"
8729"/// This intrinsic corresponds to the <c> VROUNDPS </c> instruction.\n"
8730"///\n"
8731"/// \\param V\n"
8732"/// A 256-bit vector of [8 x float].\n"
8733"/// \\returns A 256-bit vector of [8 x float] containing the rounded down values.\n"
8734"#define _mm256_floor_ps(V) _mm256_round_ps((V), _MM_FROUND_FLOOR)\n"
8735"\n"
8736"/* Logical */\n"
8737"/// Performs a bitwise AND of two 256-bit vectors of [4 x double].\n"
8738"///\n"
8739"/// \\headerfile <x86intrin.h>\n"
8740"///\n"
8741"/// This intrinsic corresponds to the <c> VANDPD </c> instruction.\n"
8742"///\n"
8743"/// \\param __a\n"
8744"/// A 256-bit vector of [4 x double] containing one of the source operands.\n"
8745"/// \\param __b\n"
8746"/// A 256-bit vector of [4 x double] containing one of the source operands.\n"
8747"/// \\returns A 256-bit vector of [4 x double] containing the bitwise AND of the\n"
8748"/// values between both operands.\n"
8749"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8750"_mm256_and_pd(__m256d __a, __m256d __b)\n"
8751"{\n"
8752" return (__m256d)((__v4du)__a & (__v4du)__b);\n"
8753"}\n"
8754"\n"
8755"/// Performs a bitwise AND of two 256-bit vectors of [8 x float].\n"
8756"///\n"
8757"/// \\headerfile <x86intrin.h>\n"
8758"///\n"
8759"/// This intrinsic corresponds to the <c> VANDPS </c> instruction.\n"
8760"///\n"
8761"/// \\param __a\n"
8762"/// A 256-bit vector of [8 x float] containing one of the source operands.\n"
8763"/// \\param __b\n"
8764"/// A 256-bit vector of [8 x float] containing one of the source operands.\n"
8765"/// \\returns A 256-bit vector of [8 x float] containing the bitwise AND of the\n"
8766"/// values between both operands.\n"
8767"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8768"_mm256_and_ps(__m256 __a, __m256 __b)\n"
8769"{\n"
8770" return (__m256)((__v8su)__a & (__v8su)__b);\n"
8771"}\n"
8772"\n"
8773"/// Performs a bitwise AND of two 256-bit vectors of [4 x double], using\n"
8774"/// the one's complement of the values contained in the first source operand.\n"
8775"///\n"
8776"/// \\headerfile <x86intrin.h>\n"
8777"///\n"
8778"/// This intrinsic corresponds to the <c> VANDNPD </c> instruction.\n"
8779"///\n"
8780"/// \\param __a\n"
8781"/// A 256-bit vector of [4 x double] containing the left source operand. The\n"
8782"/// one's complement of this value is used in the bitwise AND.\n"
8783"/// \\param __b\n"
8784"/// A 256-bit vector of [4 x double] containing the right source operand.\n"
8785"/// \\returns A 256-bit vector of [4 x double] containing the bitwise AND of the\n"
8786"/// values of the second operand and the one's complement of the first\n"
8787"/// operand.\n"
8788"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8789"_mm256_andnot_pd(__m256d __a, __m256d __b)\n"
8790"{\n"
8791" return (__m256d)(~(__v4du)__a & (__v4du)__b);\n"
8792"}\n"
8793"\n"
8794"/// Performs a bitwise AND of two 256-bit vectors of [8 x float], using\n"
8795"/// the one's complement of the values contained in the first source operand.\n"
8796"///\n"
8797"/// \\headerfile <x86intrin.h>\n"
8798"///\n"
8799"/// This intrinsic corresponds to the <c> VANDNPS </c> instruction.\n"
8800"///\n"
8801"/// \\param __a\n"
8802"/// A 256-bit vector of [8 x float] containing the left source operand. The\n"
8803"/// one's complement of this value is used in the bitwise AND.\n"
8804"/// \\param __b\n"
8805"/// A 256-bit vector of [8 x float] containing the right source operand.\n"
8806"/// \\returns A 256-bit vector of [8 x float] containing the bitwise AND of the\n"
8807"/// values of the second operand and the one's complement of the first\n"
8808"/// operand.\n"
8809"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8810"_mm256_andnot_ps(__m256 __a, __m256 __b)\n"
8811"{\n"
8812" return (__m256)(~(__v8su)__a & (__v8su)__b);\n"
8813"}\n"
8814"\n"
8815"/// Performs a bitwise OR of two 256-bit vectors of [4 x double].\n"
8816"///\n"
8817"/// \\headerfile <x86intrin.h>\n"
8818"///\n"
8819"/// This intrinsic corresponds to the <c> VORPD </c> instruction.\n"
8820"///\n"
8821"/// \\param __a\n"
8822"/// A 256-bit vector of [4 x double] containing one of the source operands.\n"
8823"/// \\param __b\n"
8824"/// A 256-bit vector of [4 x double] containing one of the source operands.\n"
8825"/// \\returns A 256-bit vector of [4 x double] containing the bitwise OR of the\n"
8826"/// values between both operands.\n"
8827"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8828"_mm256_or_pd(__m256d __a, __m256d __b)\n"
8829"{\n"
8830" return (__m256d)((__v4du)__a | (__v4du)__b);\n"
8831"}\n"
8832"\n"
8833"/// Performs a bitwise OR of two 256-bit vectors of [8 x float].\n"
8834"///\n"
8835"/// \\headerfile <x86intrin.h>\n"
8836"///\n"
8837"/// This intrinsic corresponds to the <c> VORPS </c> instruction.\n"
8838"///\n"
8839"/// \\param __a\n"
8840"/// A 256-bit vector of [8 x float] containing one of the source operands.\n"
8841"/// \\param __b\n"
8842"/// A 256-bit vector of [8 x float] containing one of the source operands.\n"
8843"/// \\returns A 256-bit vector of [8 x float] containing the bitwise OR of the\n"
8844"/// values between both operands.\n"
8845"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8846"_mm256_or_ps(__m256 __a, __m256 __b)\n"
8847"{\n"
8848" return (__m256)((__v8su)__a | (__v8su)__b);\n"
8849"}\n"
8850"\n"
8851"/// Performs a bitwise XOR of two 256-bit vectors of [4 x double].\n"
8852"///\n"
8853"/// \\headerfile <x86intrin.h>\n"
8854"///\n"
8855"/// This intrinsic corresponds to the <c> VXORPD </c> instruction.\n"
8856"///\n"
8857"/// \\param __a\n"
8858"/// A 256-bit vector of [4 x double] containing one of the source operands.\n"
8859"/// \\param __b\n"
8860"/// A 256-bit vector of [4 x double] containing one of the source operands.\n"
8861"/// \\returns A 256-bit vector of [4 x double] containing the bitwise XOR of the\n"
8862"/// values between both operands.\n"
8863"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8864"_mm256_xor_pd(__m256d __a, __m256d __b)\n"
8865"{\n"
8866" return (__m256d)((__v4du)__a ^ (__v4du)__b);\n"
8867"}\n"
8868"\n"
8869"/// Performs a bitwise XOR of two 256-bit vectors of [8 x float].\n"
8870"///\n"
8871"/// \\headerfile <x86intrin.h>\n"
8872"///\n"
8873"/// This intrinsic corresponds to the <c> VXORPS </c> instruction.\n"
8874"///\n"
8875"/// \\param __a\n"
8876"/// A 256-bit vector of [8 x float] containing one of the source operands.\n"
8877"/// \\param __b\n"
8878"/// A 256-bit vector of [8 x float] containing one of the source operands.\n"
8879"/// \\returns A 256-bit vector of [8 x float] containing the bitwise XOR of the\n"
8880"/// values between both operands.\n"
8881"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8882"_mm256_xor_ps(__m256 __a, __m256 __b)\n"
8883"{\n"
8884" return (__m256)((__v8su)__a ^ (__v8su)__b);\n"
8885"}\n"
8886"\n"
8887"/* Horizontal arithmetic */\n"
8888"/// Horizontally adds the adjacent pairs of values contained in two\n"
8889"/// 256-bit vectors of [4 x double].\n"
8890"///\n"
8891"/// \\headerfile <x86intrin.h>\n"
8892"///\n"
8893"/// This intrinsic corresponds to the <c> VHADDPD </c> instruction.\n"
8894"///\n"
8895"/// \\param __a\n"
8896"/// A 256-bit vector of [4 x double] containing one of the source operands.\n"
8897"/// The horizontal sums of the values are returned in the even-indexed\n"
8898"/// elements of a vector of [4 x double].\n"
8899"/// \\param __b\n"
8900"/// A 256-bit vector of [4 x double] containing one of the source operands.\n"
8901"/// The horizontal sums of the values are returned in the odd-indexed\n"
8902"/// elements of a vector of [4 x double].\n"
8903"/// \\returns A 256-bit vector of [4 x double] containing the horizontal sums of\n"
8904"/// both operands.\n"
8905"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8906"_mm256_hadd_pd(__m256d __a, __m256d __b)\n"
8907"{\n"
8908" return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b);\n"
8909"}\n"
8910"\n"
8911"/// Horizontally adds the adjacent pairs of values contained in two\n"
8912"/// 256-bit vectors of [8 x float].\n"
8913"///\n"
8914"/// \\headerfile <x86intrin.h>\n"
8915"///\n"
8916"/// This intrinsic corresponds to the <c> VHADDPS </c> instruction.\n"
8917"///\n"
8918"/// \\param __a\n"
8919"/// A 256-bit vector of [8 x float] containing one of the source operands.\n"
8920"/// The horizontal sums of the values are returned in the elements with\n"
8921"/// index 0, 1, 4, 5 of a vector of [8 x float].\n"
8922"/// \\param __b\n"
8923"/// A 256-bit vector of [8 x float] containing one of the source operands.\n"
8924"/// The horizontal sums of the values are returned in the elements with\n"
8925"/// index 2, 3, 6, 7 of a vector of [8 x float].\n"
8926"/// \\returns A 256-bit vector of [8 x float] containing the horizontal sums of\n"
8927"/// both operands.\n"
8928"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8929"_mm256_hadd_ps(__m256 __a, __m256 __b)\n"
8930"{\n"
8931" return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b);\n"
8932"}\n"
8933"\n"
8934"/// Horizontally subtracts the adjacent pairs of values contained in two\n"
8935"/// 256-bit vectors of [4 x double].\n"
8936"///\n"
8937"/// \\headerfile <x86intrin.h>\n"
8938"///\n"
8939"/// This intrinsic corresponds to the <c> VHSUBPD </c> instruction.\n"
8940"///\n"
8941"/// \\param __a\n"
8942"/// A 256-bit vector of [4 x double] containing one of the source operands.\n"
8943"/// The horizontal differences between the values are returned in the\n"
8944"/// even-indexed elements of a vector of [4 x double].\n"
8945"/// \\param __b\n"
8946"/// A 256-bit vector of [4 x double] containing one of the source operands.\n"
8947"/// The horizontal differences between the values are returned in the\n"
8948"/// odd-indexed elements of a vector of [4 x double].\n"
8949"/// \\returns A 256-bit vector of [4 x double] containing the horizontal\n"
8950"/// differences of both operands.\n"
8951"static __inline __m256d __DEFAULT_FN_ATTRS\n"
8952"_mm256_hsub_pd(__m256d __a, __m256d __b)\n"
8953"{\n"
8954" return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b);\n"
8955"}\n"
8956"\n"
8957"/// Horizontally subtracts the adjacent pairs of values contained in two\n"
8958"/// 256-bit vectors of [8 x float].\n"
8959"///\n"
8960"/// \\headerfile <x86intrin.h>\n"
8961"///\n"
8962"/// This intrinsic corresponds to the <c> VHSUBPS </c> instruction.\n"
8963"///\n"
8964"/// \\param __a\n"
8965"/// A 256-bit vector of [8 x float] containing one of the source operands.\n"
8966"/// The horizontal differences between the values are returned in the\n"
8967"/// elements with index 0, 1, 4, 5 of a vector of [8 x float].\n"
8968"/// \\param __b\n"
8969"/// A 256-bit vector of [8 x float] containing one of the source operands.\n"
8970"/// The horizontal differences between the values are returned in the\n"
8971"/// elements with index 2, 3, 6, 7 of a vector of [8 x float].\n"
8972"/// \\returns A 256-bit vector of [8 x float] containing the horizontal\n"
8973"/// differences of both operands.\n"
8974"static __inline __m256 __DEFAULT_FN_ATTRS\n"
8975"_mm256_hsub_ps(__m256 __a, __m256 __b)\n"
8976"{\n"
8977" return (__m256)__builtin_ia32_hsubps256((__v8sf)__a, (__v8sf)__b);\n"
8978"}\n"
8979"\n"
8980"/* Vector permutations */\n"
8981"/// Copies the values in a 128-bit vector of [2 x double] as specified\n"
8982"/// by the 128-bit integer vector operand.\n"
8983"///\n"
8984"/// \\headerfile <x86intrin.h>\n"
8985"///\n"
8986"/// This intrinsic corresponds to the <c> VPERMILPD </c> instruction.\n"
8987"///\n"
8988"/// \\param __a\n"
8989"/// A 128-bit vector of [2 x double].\n"
8990"/// \\param __c\n"
8991"/// A 128-bit integer vector operand specifying how the values are to be\n"
8992"/// copied. \\n\n"
8993"/// Bit [1]: \\n\n"
8994"/// 0: Bits [63:0] of the source are copied to bits [63:0] of the returned\n"
8995"/// vector. \\n\n"
8996"/// 1: Bits [127:64] of the source are copied to bits [63:0] of the\n"
8997"/// returned vector. \\n\n"
8998"/// Bit [65]: \\n\n"
8999"/// 0: Bits [63:0] of the source are copied to bits [127:64] of the\n"
9000"/// returned vector. \\n\n"
9001"/// 1: Bits [127:64] of the source are copied to bits [127:64] of the\n"
9002"/// returned vector.\n"
9003"/// \\returns A 128-bit vector of [2 x double] containing the copied values.\n"
9004"static __inline __m128d __DEFAULT_FN_ATTRS128\n"
9005"_mm_permutevar_pd(__m128d __a, __m128i __c)\n"
9006"{\n"
9007" return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c);\n"
9008"}\n"
9009"\n"
9010"/// Copies the values in a 256-bit vector of [4 x double] as specified\n"
9011"/// by the 256-bit integer vector operand.\n"
9012"///\n"
9013"/// \\headerfile <x86intrin.h>\n"
9014"///\n"
9015"/// This intrinsic corresponds to the <c> VPERMILPD </c> instruction.\n"
9016"///\n"
9017"/// \\param __a\n"
9018"/// A 256-bit vector of [4 x double].\n"
9019"/// \\param __c\n"
9020"/// A 256-bit integer vector operand specifying how the values are to be\n"
9021"/// copied. \\n\n"
9022"/// Bit [1]: \\n\n"
9023"/// 0: Bits [63:0] of the source are copied to bits [63:0] of the returned\n"
9024"/// vector. \\n\n"
9025"/// 1: Bits [127:64] of the source are copied to bits [63:0] of the\n"
9026"/// returned vector. \\n\n"
9027"/// Bit [65]: \\n\n"
9028"/// 0: Bits [63:0] of the source are copied to bits [127:64] of the\n"
9029"/// returned vector. \\n\n"
9030"/// 1: Bits [127:64] of the source are copied to bits [127:64] of the\n"
9031"/// returned vector. \\n\n"
9032"/// Bit [129]: \\n\n"
9033"/// 0: Bits [191:128] of the source are copied to bits [191:128] of the\n"
9034"/// returned vector. \\n\n"
9035"/// 1: Bits [255:192] of the source are copied to bits [191:128] of the\n"
9036"/// returned vector. \\n\n"
9037"/// Bit [193]: \\n\n"
9038"/// 0: Bits [191:128] of the source are copied to bits [255:192] of the\n"
9039"/// returned vector. \\n\n"
9040"/// 1: Bits [255:192] of the source are copied to bits [255:192] of the\n"
9041"/// returned vector.\n"
9042"/// \\returns A 256-bit vector of [4 x double] containing the copied values.\n"
9043"static __inline __m256d __DEFAULT_FN_ATTRS\n"
9044"_mm256_permutevar_pd(__m256d __a, __m256i __c)\n"
9045"{\n"
9046" return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c);\n"
9047"}\n"
9048"\n"
9049"/// Copies the values stored in a 128-bit vector of [4 x float] as\n"
9050"/// specified by the 128-bit integer vector operand.\n"
9051"/// \\headerfile <x86intrin.h>\n"
9052"///\n"
9053"/// This intrinsic corresponds to the <c> VPERMILPS </c> instruction.\n"
9054"///\n"
9055"/// \\param __a\n"
9056"/// A 128-bit vector of [4 x float].\n"
9057"/// \\param __c\n"
9058"/// A 128-bit integer vector operand specifying how the values are to be\n"
9059"/// copied. \\n\n"
9060"/// Bits [1:0]: \\n\n"
9061"/// 00: Bits [31:0] of the source are copied to bits [31:0] of the\n"
9062"/// returned vector. \\n\n"
9063"/// 01: Bits [63:32] of the source are copied to bits [31:0] of the\n"
9064"/// returned vector. \\n\n"
9065"/// 10: Bits [95:64] of the source are copied to bits [31:0] of the\n"
9066"/// returned vector. \\n\n"
9067"/// 11: Bits [127:96] of the source are copied to bits [31:0] of the\n"
9068"/// returned vector. \\n\n"
9069"/// Bits [33:32]: \\n\n"
9070"/// 00: Bits [31:0] of the source are copied to bits [63:32] of the\n"
9071"/// returned vector. \\n\n"
9072"/// 01: Bits [63:32] of the source are copied to bits [63:32] of the\n"
9073"/// returned vector. \\n\n"
9074"/// 10: Bits [95:64] of the source are copied to bits [63:32] of the\n"
9075"/// returned vector. \\n\n"
9076"/// 11: Bits [127:96] of the source are copied to bits [63:32] of the\n"
9077"/// returned vector. \\n\n"
9078"/// Bits [65:64]: \\n\n"
9079"/// 00: Bits [31:0] of the source are copied to bits [95:64] of the\n"
9080"/// returned vector. \\n\n"
9081"/// 01: Bits [63:32] of the source are copied to bits [95:64] of the\n"
9082"/// returned vector. \\n\n"
9083"/// 10: Bits [95:64] of the source are copied to bits [95:64] of the\n"
9084"/// returned vector. \\n\n"
9085"/// 11: Bits [127:96] of the source are copied to bits [95:64] of the\n"
9086"/// returned vector. \\n\n"
9087"/// Bits [97:96]: \\n\n"
9088"/// 00: Bits [31:0] of the source are copied to bits [127:96] of the\n"
9089"/// returned vector. \\n\n"
9090"/// 01: Bits [63:32] of the source are copied to bits [127:96] of the\n"
9091"/// returned vector. \\n\n"
9092"/// 10: Bits [95:64] of the source are copied to bits [127:96] of the\n"
9093"/// returned vector. \\n\n"
9094"/// 11: Bits [127:96] of the source are copied to bits [127:96] of the\n"
9095"/// returned vector.\n"
9096"/// \\returns A 128-bit vector of [4 x float] containing the copied values.\n"
9097"static __inline __m128 __DEFAULT_FN_ATTRS128\n"
9098"_mm_permutevar_ps(__m128 __a, __m128i __c)\n"
9099"{\n"
9100" return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c);\n"
9101"}\n"
9102"\n"
9103"/// Copies the values stored in a 256-bit vector of [8 x float] as\n"
9104"/// specified by the 256-bit integer vector operand.\n"
9105"///\n"
9106"/// \\headerfile <x86intrin.h>\n"
9107"///\n"
9108"/// This intrinsic corresponds to the <c> VPERMILPS </c> instruction.\n"
9109"///\n"
9110"/// \\param __a\n"
9111"/// A 256-bit vector of [8 x float].\n"
9112"/// \\param __c\n"
9113"/// A 256-bit integer vector operand specifying how the values are to be\n"
9114"/// copied. \\n\n"
9115"/// Bits [1:0]: \\n\n"
9116"/// 00: Bits [31:0] of the source are copied to bits [31:0] of the\n"
9117"/// returned vector. \\n\n"
9118"/// 01: Bits [63:32] of the source are copied to bits [31:0] of the\n"
9119"/// returned vector. \\n\n"
9120"/// 10: Bits [95:64] of the source are copied to bits [31:0] of the\n"
9121"/// returned vector. \\n\n"
9122"/// 11: Bits [127:96] of the source are copied to bits [31:0] of the\n"
9123"/// returned vector. \\n\n"
9124"/// Bits [33:32]: \\n\n"
9125"/// 00: Bits [31:0] of the source are copied to bits [63:32] of the\n"
9126"/// returned vector. \\n\n"
9127"/// 01: Bits [63:32] of the source are copied to bits [63:32] of the\n"
9128"/// returned vector. \\n\n"
9129"/// 10: Bits [95:64] of the source are copied to bits [63:32] of the\n"
9130"/// returned vector. \\n\n"
9131"/// 11: Bits [127:96] of the source are copied to bits [63:32] of the\n"
9132"/// returned vector. \\n\n"
9133"/// Bits [65:64]: \\n\n"
9134"/// 00: Bits [31:0] of the source are copied to bits [95:64] of the\n"
9135"/// returned vector. \\n\n"
9136"/// 01: Bits [63:32] of the source are copied to bits [95:64] of the\n"
9137"/// returned vector. \\n\n"
9138"/// 10: Bits [95:64] of the source are copied to bits [95:64] of the\n"
9139"/// returned vector. \\n\n"
9140"/// 11: Bits [127:96] of the source are copied to bits [95:64] of the\n"
9141"/// returned vector. \\n\n"
9142"/// Bits [97:96]: \\n\n"
9143"/// 00: Bits [31:0] of the source are copied to bits [127:96] of the\n"
9144"/// returned vector. \\n\n"
9145"/// 01: Bits [63:32] of the source are copied to bits [127:96] of the\n"
9146"/// returned vector. \\n\n"
9147"/// 10: Bits [95:64] of the source are copied to bits [127:96] of the\n"
9148"/// returned vector. \\n\n"
9149"/// 11: Bits [127:96] of the source are copied to bits [127:96] of the\n"
9150"/// returned vector. \\n\n"
9151"/// Bits [129:128]: \\n\n"
9152"/// 00: Bits [159:128] of the source are copied to bits [159:128] of the\n"
9153"/// returned vector. \\n\n"
9154"/// 01: Bits [191:160] of the source are copied to bits [159:128] of the\n"
9155"/// returned vector. \\n\n"
9156"/// 10: Bits [223:192] of the source are copied to bits [159:128] of the\n"
9157"/// returned vector. \\n\n"
9158"/// 11: Bits [255:224] of the source are copied to bits [159:128] of the\n"
9159"/// returned vector. \\n\n"
9160"/// Bits [161:160]: \\n\n"
9161"/// 00: Bits [159:128] of the source are copied to bits [191:160] of the\n"
9162"/// returned vector. \\n\n"
9163"/// 01: Bits [191:160] of the source are copied to bits [191:160] of the\n"
9164"/// returned vector. \\n\n"
9165"/// 10: Bits [223:192] of the source are copied to bits [191:160] of the\n"
9166"/// returned vector. \\n\n"
9167"/// 11: Bits [255:224] of the source are copied to bits [191:160] of the\n"
9168"/// returned vector. \\n\n"
9169"/// Bits [193:192]: \\n\n"
9170"/// 00: Bits [159:128] of the source are copied to bits [223:192] of the\n"
9171"/// returned vector. \\n\n"
9172"/// 01: Bits [191:160] of the source are copied to bits [223:192] of the\n"
9173"/// returned vector. \\n\n"
9174"/// 10: Bits [223:192] of the source are copied to bits [223:192] of the\n"
9175"/// returned vector. \\n\n"
9176"/// 11: Bits [255:224] of the source are copied to bits [223:192] of the\n"
9177"/// returned vector. \\n\n"
9178"/// Bits [225:224]: \\n\n"
9179"/// 00: Bits [159:128] of the source are copied to bits [255:224] of the\n"
9180"/// returned vector. \\n\n"
9181"/// 01: Bits [191:160] of the source are copied to bits [255:224] of the\n"
9182"/// returned vector. \\n\n"
9183"/// 10: Bits [223:192] of the source are copied to bits [255:224] of the\n"
9184"/// returned vector. \\n\n"
9185"/// 11: Bits [255:224] of the source are copied to bits [255:224] of the\n"
9186"/// returned vector.\n"
9187"/// \\returns A 256-bit vector of [8 x float] containing the copied values.\n"
9188"static __inline __m256 __DEFAULT_FN_ATTRS\n"
9189"_mm256_permutevar_ps(__m256 __a, __m256i __c)\n"
9190"{\n"
9191" return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c);\n"
9192"}\n"
9193"\n"
9194"/// Copies the values in a 128-bit vector of [2 x double] as specified\n"
9195"/// by the immediate integer operand.\n"
9196"///\n"
9197"/// \\headerfile <x86intrin.h>\n"
9198"///\n"
9199"/// \\code\n"
9200"/// __m128d _mm_permute_pd(__m128d A, const int C);\n"
9201"/// \\endcode\n"
9202"///\n"
9203"/// This intrinsic corresponds to the <c> VPERMILPD </c> instruction.\n"
9204"///\n"
9205"/// \\param A\n"
9206"/// A 128-bit vector of [2 x double].\n"
9207"/// \\param C\n"
9208"/// An immediate integer operand specifying how the values are to be\n"
9209"/// copied. \\n\n"
9210"/// Bit [0]: \\n\n"
9211"/// 0: Bits [63:0] of the source are copied to bits [63:0] of the returned\n"
9212"/// vector. \\n\n"
9213"/// 1: Bits [127:64] of the source are copied to bits [63:0] of the\n"
9214"/// returned vector. \\n\n"
9215"/// Bit [1]: \\n\n"
9216"/// 0: Bits [63:0] of the source are copied to bits [127:64] of the\n"
9217"/// returned vector. \\n\n"
9218"/// 1: Bits [127:64] of the source are copied to bits [127:64] of the\n"
9219"/// returned vector.\n"
9220"/// \\returns A 128-bit vector of [2 x double] containing the copied values.\n"
9221"#define _mm_permute_pd(A, C) \\\n"
9222" (__m128d)__builtin_ia32_vpermilpd((__v2df)(__m128d)(A), (int)(C))\n"
9223"\n"
9224"/// Copies the values in a 256-bit vector of [4 x double] as specified by\n"
9225"/// the immediate integer operand.\n"
9226"///\n"
9227"/// \\headerfile <x86intrin.h>\n"
9228"///\n"
9229"/// \\code\n"
9230"/// __m256d _mm256_permute_pd(__m256d A, const int C);\n"
9231"/// \\endcode\n"
9232"///\n"
9233"/// This intrinsic corresponds to the <c> VPERMILPD </c> instruction.\n"
9234"///\n"
9235"/// \\param A\n"
9236"/// A 256-bit vector of [4 x double].\n"
9237"/// \\param C\n"
9238"/// An immediate integer operand specifying how the values are to be\n"
9239"/// copied. \\n\n"
9240"/// Bit [0]: \\n\n"
9241"/// 0: Bits [63:0] of the source are copied to bits [63:0] of the returned\n"
9242"/// vector. \\n\n"
9243"/// 1: Bits [127:64] of the source are copied to bits [63:0] of the\n"
9244"/// returned vector. \\n\n"
9245"/// Bit [1]: \\n\n"
9246"/// 0: Bits [63:0] of the source are copied to bits [127:64] of the\n"
9247"/// returned vector. \\n\n"
9248"/// 1: Bits [127:64] of the source are copied to bits [127:64] of the\n"
9249"/// returned vector. \\n\n"
9250"/// Bit [2]: \\n\n"
9251"/// 0: Bits [191:128] of the source are copied to bits [191:128] of the\n"
9252"/// returned vector. \\n\n"
9253"/// 1: Bits [255:192] of the source are copied to bits [191:128] of the\n"
9254"/// returned vector. \\n\n"
9255"/// Bit [3]: \\n\n"
9256"/// 0: Bits [191:128] of the source are copied to bits [255:192] of the\n"
9257"/// returned vector. \\n\n"
9258"/// 1: Bits [255:192] of the source are copied to bits [255:192] of the\n"
9259"/// returned vector.\n"
9260"/// \\returns A 256-bit vector of [4 x double] containing the copied values.\n"
9261"#define _mm256_permute_pd(A, C) \\\n"
9262" (__m256d)__builtin_ia32_vpermilpd256((__v4df)(__m256d)(A), (int)(C))\n"
9263"\n"
9264"/// Copies the values in a 128-bit vector of [4 x float] as specified by\n"
9265"/// the immediate integer operand.\n"
9266"///\n"
9267"/// \\headerfile <x86intrin.h>\n"
9268"///\n"
9269"/// \\code\n"
9270"/// __m128 _mm_permute_ps(__m128 A, const int C);\n"
9271"/// \\endcode\n"
9272"///\n"
9273"/// This intrinsic corresponds to the <c> VPERMILPS </c> instruction.\n"
9274"///\n"
9275"/// \\param A\n"
9276"/// A 128-bit vector of [4 x float].\n"
9277"/// \\param C\n"
9278"/// An immediate integer operand specifying how the values are to be\n"
9279"/// copied. \\n\n"
9280"/// Bits [1:0]: \\n\n"
9281"/// 00: Bits [31:0] of the source are copied to bits [31:0] of the\n"
9282"/// returned vector. \\n\n"
9283"/// 01: Bits [63:32] of the source are copied to bits [31:0] of the\n"
9284"/// returned vector. \\n\n"
9285"/// 10: Bits [95:64] of the source are copied to bits [31:0] of the\n"
9286"/// returned vector. \\n\n"
9287"/// 11: Bits [127:96] of the source are copied to bits [31:0] of the\n"
9288"/// returned vector. \\n\n"
9289"/// Bits [3:2]: \\n\n"
9290"/// 00: Bits [31:0] of the source are copied to bits [63:32] of the\n"
9291"/// returned vector. \\n\n"
9292"/// 01: Bits [63:32] of the source are copied to bits [63:32] of the\n"
9293"/// returned vector. \\n\n"
9294"/// 10: Bits [95:64] of the source are copied to bits [63:32] of the\n"
9295"/// returned vector. \\n\n"
9296"/// 11: Bits [127:96] of the source are copied to bits [63:32] of the\n"
9297"/// returned vector. \\n\n"
9298"/// Bits [5:4]: \\n\n"
9299"/// 00: Bits [31:0] of the source are copied to bits [95:64] of the\n"
9300"/// returned vector. \\n\n"
9301"/// 01: Bits [63:32] of the source are copied to bits [95:64] of the\n"
9302"/// returned vector. \\n\n"
9303"/// 10: Bits [95:64] of the source are copied to bits [95:64] of the\n"
9304"/// returned vector. \\n\n"
9305"/// 11: Bits [127:96] of the source are copied to bits [95:64] of the\n"
9306"/// returned vector. \\n\n"
9307"/// Bits [7:6]: \\n\n"
9308"/// 00: Bits [31:0] of the source are copied to bits [127:96] of the\n"
9309"/// returned vector. \\n\n"
9310"/// 01: Bits [63:32] of the source are copied to bits [127:96] of the\n"
9311"/// returned vector. \\n\n"
9312"/// 10: Bits [95:64] of the source are copied to bits [127:96] of the\n"
9313"/// returned vector. \\n\n"
9314"/// 11: Bits [127:96] of the source are copied to bits [127:96] of the\n"
9315"/// returned vector.\n"
9316"/// \\returns A 128-bit vector of [4 x float] containing the copied values.\n"
9317"#define _mm_permute_ps(A, C) \\\n"
9318" (__m128)__builtin_ia32_vpermilps((__v4sf)(__m128)(A), (int)(C))\n"
9319"\n"
9320"/// Copies the values in a 256-bit vector of [8 x float] as specified by\n"
9321"/// the immediate integer operand.\n"
9322"///\n"
9323"/// \\headerfile <x86intrin.h>\n"
9324"///\n"
9325"/// \\code\n"
9326"/// __m256 _mm256_permute_ps(__m256 A, const int C);\n"
9327"/// \\endcode\n"
9328"///\n"
9329"/// This intrinsic corresponds to the <c> VPERMILPS </c> instruction.\n"
9330"///\n"
9331"/// \\param A\n"
9332"/// A 256-bit vector of [8 x float].\n"
9333"/// \\param C\n"
9334"/// An immediate integer operand specifying how the values are to be\n"
9335"/// copied. \\n\n"
9336"/// Bits [1:0]: \\n\n"
9337"/// 00: Bits [31:0] of the source are copied to bits [31:0] of the\n"
9338"/// returned vector. \\n\n"
9339"/// 01: Bits [63:32] of the source are copied to bits [31:0] of the\n"
9340"/// returned vector. \\n\n"
9341"/// 10: Bits [95:64] of the source are copied to bits [31:0] of the\n"
9342"/// returned vector. \\n\n"
9343"/// 11: Bits [127:96] of the source are copied to bits [31:0] of the\n"
9344"/// returned vector. \\n\n"
9345"/// Bits [3:2]: \\n\n"
9346"/// 00: Bits [31:0] of the source are copied to bits [63:32] of the\n"
9347"/// returned vector. \\n\n"
9348"/// 01: Bits [63:32] of the source are copied to bits [63:32] of the\n"
9349"/// returned vector. \\n\n"
9350"/// 10: Bits [95:64] of the source are copied to bits [63:32] of the\n"
9351"/// returned vector. \\n\n"
9352"/// 11: Bits [127:96] of the source are copied to bits [63:32] of the\n"
9353"/// returned vector. \\n\n"
9354"/// Bits [5:4]: \\n\n"
9355"/// 00: Bits [31:0] of the source are copied to bits [95:64] of the\n"
9356"/// returned vector. \\n\n"
9357"/// 01: Bits [63:32] of the source are copied to bits [95:64] of the\n"
9358"/// returned vector. \\n\n"
9359"/// 10: Bits [95:64] of the source are copied to bits [95:64] of the\n"
9360"/// returned vector. \\n\n"
9361"/// 11: Bits [127:96] of the source are copied to bits [95:64] of the\n"
9362"/// returned vector. \\n\n"
9363"/// Bits [7:6]: \\n\n"
9364"/// 00: Bits [31:0] of the source are copied to bits [127:96] of the\n"
9365"/// returned vector. \\n\n"
9366"/// 01: Bits [63:32] of the source are copied to bits [127:96] of the\n"
9367"/// returned vector. \\n\n"
9368"/// 10: Bits [95:64] of the source are copied to bits [127:96] of the\n"
9369"/// returned vector. \\n\n"
9370"/// 11: Bits [127:96] of the source are copied to bits [127:96] of the\n"
9371"/// returned vector. \\n\n"
9372"/// Bits [1:0]: \\n\n"
9373"/// 00: Bits [159:128] of the source are copied to bits [159:128] of the\n"
9374"/// returned vector. \\n\n"
9375"/// 01: Bits [191:160] of the source are copied to bits [159:128] of the\n"
9376"/// returned vector. \\n\n"
9377"/// 10: Bits [223:192] of the source are copied to bits [159:128] of the\n"
9378"/// returned vector. \\n\n"
9379"/// 11: Bits [255:224] of the source are copied to bits [159:128] of the\n"
9380"/// returned vector. \\n\n"
9381"/// Bits [3:2]: \\n\n"
9382"/// 00: Bits [159:128] of the source are copied to bits [191:160] of the\n"
9383"/// returned vector. \\n\n"
9384"/// 01: Bits [191:160] of the source are copied to bits [191:160] of the\n"
9385"/// returned vector. \\n\n"
9386"/// 10: Bits [223:192] of the source are copied to bits [191:160] of the\n"
9387"/// returned vector. \\n\n"
9388"/// 11: Bits [255:224] of the source are copied to bits [191:160] of the\n"
9389"/// returned vector. \\n\n"
9390"/// Bits [5:4]: \\n\n"
9391"/// 00: Bits [159:128] of the source are copied to bits [223:192] of the\n"
9392"/// returned vector. \\n\n"
9393"/// 01: Bits [191:160] of the source are copied to bits [223:192] of the\n"
9394"/// returned vector. \\n\n"
9395"/// 10: Bits [223:192] of the source are copied to bits [223:192] of the\n"
9396"/// returned vector. \\n\n"
9397"/// 11: Bits [255:224] of the source are copied to bits [223:192] of the\n"
9398"/// returned vector. \\n\n"
9399"/// Bits [7:6]: \\n\n"
9400"/// 00: Bits [159:128] of the source are copied to bits [255:224] of the\n"
9401"/// returned vector. \\n\n"
9402"/// 01: Bits [191:160] of the source are copied to bits [255:224] of the\n"
9403"/// returned vector. \\n\n"
9404"/// 10: Bits [223:192] of the source are copied to bits [255:224] of the\n"
9405"/// returned vector. \\n\n"
9406"/// 11: Bits [255:224] of the source are copied to bits [255:224] of the\n"
9407"/// returned vector.\n"
9408"/// \\returns A 256-bit vector of [8 x float] containing the copied values.\n"
9409"#define _mm256_permute_ps(A, C) \\\n"
9410" (__m256)__builtin_ia32_vpermilps256((__v8sf)(__m256)(A), (int)(C))\n"
9411"\n"
9412"/// Permutes 128-bit data values stored in two 256-bit vectors of\n"
9413"/// [4 x double], as specified by the immediate integer operand.\n"
9414"///\n"
9415"/// \\headerfile <x86intrin.h>\n"
9416"///\n"
9417"/// \\code\n"
9418"/// __m256d _mm256_permute2f128_pd(__m256d V1, __m256d V2, const int M);\n"
9419"/// \\endcode\n"
9420"///\n"
9421"/// This intrinsic corresponds to the <c> VPERM2F128 </c> instruction.\n"
9422"///\n"
9423"/// \\param V1\n"
9424"/// A 256-bit vector of [4 x double].\n"
9425"/// \\param V2\n"
9426"/// A 256-bit vector of [4 x double.\n"
9427"/// \\param M\n"
9428"/// An immediate integer operand specifying how the values are to be\n"
9429"/// permuted. \\n\n"
9430"/// Bits [1:0]: \\n\n"
9431"/// 00: Bits [127:0] of operand \\a V1 are copied to bits [127:0] of the\n"
9432"/// destination. \\n\n"
9433"/// 01: Bits [255:128] of operand \\a V1 are copied to bits [127:0] of the\n"
9434"/// destination. \\n\n"
9435"/// 10: Bits [127:0] of operand \\a V2 are copied to bits [127:0] of the\n"
9436"/// destination. \\n\n"
9437"/// 11: Bits [255:128] of operand \\a V2 are copied to bits [127:0] of the\n"
9438"/// destination. \\n\n"
9439"/// Bits [5:4]: \\n\n"
9440"/// 00: Bits [127:0] of operand \\a V1 are copied to bits [255:128] of the\n"
9441"/// destination. \\n\n"
9442"/// 01: Bits [255:128] of operand \\a V1 are copied to bits [255:128] of the\n"
9443"/// destination. \\n\n"
9444"/// 10: Bits [127:0] of operand \\a V2 are copied to bits [255:128] of the\n"
9445"/// destination. \\n\n"
9446"/// 11: Bits [255:128] of operand \\a V2 are copied to bits [255:128] of the\n"
9447"/// destination.\n"
9448"/// \\returns A 256-bit vector of [4 x double] containing the copied values.\n"
9449"#define _mm256_permute2f128_pd(V1, V2, M) \\\n"
9450" (__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)(__m256d)(V1), \\\n"
9451" (__v4df)(__m256d)(V2), (int)(M))\n"
9452"\n"
9453"/// Permutes 128-bit data values stored in two 256-bit vectors of\n"
9454"/// [8 x float], as specified by the immediate integer operand.\n"
9455"///\n"
9456"/// \\headerfile <x86intrin.h>\n"
9457"///\n"
9458"/// \\code\n"
9459"/// __m256 _mm256_permute2f128_ps(__m256 V1, __m256 V2, const int M);\n"
9460"/// \\endcode\n"
9461"///\n"
9462"/// This intrinsic corresponds to the <c> VPERM2F128 </c> instruction.\n"
9463"///\n"
9464"/// \\param V1\n"
9465"/// A 256-bit vector of [8 x float].\n"
9466"/// \\param V2\n"
9467"/// A 256-bit vector of [8 x float].\n"
9468"/// \\param M\n"
9469"/// An immediate integer operand specifying how the values are to be\n"
9470"/// permuted. \\n\n"
9471"/// Bits [1:0]: \\n\n"
9472"/// 00: Bits [127:0] of operand \\a V1 are copied to bits [127:0] of the\n"
9473"/// destination. \\n\n"
9474"/// 01: Bits [255:128] of operand \\a V1 are copied to bits [127:0] of the\n"
9475"/// destination. \\n\n"
9476"/// 10: Bits [127:0] of operand \\a V2 are copied to bits [127:0] of the\n"
9477"/// destination. \\n\n"
9478"/// 11: Bits [255:128] of operand \\a V2 are copied to bits [127:0] of the\n"
9479"/// destination. \\n\n"
9480"/// Bits [5:4]: \\n\n"
9481"/// 00: Bits [127:0] of operand \\a V1 are copied to bits [255:128] of the\n"
9482"/// destination. \\n\n"
9483"/// 01: Bits [255:128] of operand \\a V1 are copied to bits [255:128] of the\n"
9484"/// destination. \\n\n"
9485"/// 10: Bits [127:0] of operand \\a V2 are copied to bits [255:128] of the\n"
9486"/// destination. \\n\n"
9487"/// 11: Bits [255:128] of operand \\a V2 are copied to bits [255:128] of the\n"
9488"/// destination.\n"
9489"/// \\returns A 256-bit vector of [8 x float] containing the copied values.\n"
9490"#define _mm256_permute2f128_ps(V1, V2, M) \\\n"
9491" (__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)(__m256)(V1), \\\n"
9492" (__v8sf)(__m256)(V2), (int)(M))\n"
9493"\n"
9494"/// Permutes 128-bit data values stored in two 256-bit integer vectors,\n"
9495"/// as specified by the immediate integer operand.\n"
9496"///\n"
9497"/// \\headerfile <x86intrin.h>\n"
9498"///\n"
9499"/// \\code\n"
9500"/// __m256i _mm256_permute2f128_si256(__m256i V1, __m256i V2, const int M);\n"
9501"/// \\endcode\n"
9502"///\n"
9503"/// This intrinsic corresponds to the <c> VPERM2F128 </c> instruction.\n"
9504"///\n"
9505"/// \\param V1\n"
9506"/// A 256-bit integer vector.\n"
9507"/// \\param V2\n"
9508"/// A 256-bit integer vector.\n"
9509"/// \\param M\n"
9510"/// An immediate integer operand specifying how the values are to be copied.\n"
9511"/// Bits [1:0]: \\n\n"
9512"/// 00: Bits [127:0] of operand \\a V1 are copied to bits [127:0] of the\n"
9513"/// destination. \\n\n"
9514"/// 01: Bits [255:128] of operand \\a V1 are copied to bits [127:0] of the\n"
9515"/// destination. \\n\n"
9516"/// 10: Bits [127:0] of operand \\a V2 are copied to bits [127:0] of the\n"
9517"/// destination. \\n\n"
9518"/// 11: Bits [255:128] of operand \\a V2 are copied to bits [127:0] of the\n"
9519"/// destination. \\n\n"
9520"/// Bits [5:4]: \\n\n"
9521"/// 00: Bits [127:0] of operand \\a V1 are copied to bits [255:128] of the\n"
9522"/// destination. \\n\n"
9523"/// 01: Bits [255:128] of operand \\a V1 are copied to bits [255:128] of the\n"
9524"/// destination. \\n\n"
9525"/// 10: Bits [127:0] of operand \\a V2 are copied to bits [255:128] of the\n"
9526"/// destination. \\n\n"
9527"/// 11: Bits [255:128] of operand \\a V2 are copied to bits [255:128] of the\n"
9528"/// destination.\n"
9529"/// \\returns A 256-bit integer vector containing the copied values.\n"
9530"#define _mm256_permute2f128_si256(V1, V2, M) \\\n"
9531" (__m256i)__builtin_ia32_vperm2f128_si256((__v8si)(__m256i)(V1), \\\n"
9532" (__v8si)(__m256i)(V2), (int)(M))\n"
9533"\n"
9534"/* Vector Blend */\n"
9535"/// Merges 64-bit double-precision data values stored in either of the\n"
9536"/// two 256-bit vectors of [4 x double], as specified by the immediate\n"
9537"/// integer operand.\n"
9538"///\n"
9539"/// \\headerfile <x86intrin.h>\n"
9540"///\n"
9541"/// \\code\n"
9542"/// __m256d _mm256_blend_pd(__m256d V1, __m256d V2, const int M);\n"
9543"/// \\endcode\n"
9544"///\n"
9545"/// This intrinsic corresponds to the <c> VBLENDPD </c> instruction.\n"
9546"///\n"
9547"/// \\param V1\n"
9548"/// A 256-bit vector of [4 x double].\n"
9549"/// \\param V2\n"
9550"/// A 256-bit vector of [4 x double].\n"
9551"/// \\param M\n"
9552"/// An immediate integer operand, with mask bits [3:0] specifying how the\n"
9553"/// values are to be copied. The position of the mask bit corresponds to the\n"
9554"/// index of a copied value. When a mask bit is 0, the corresponding 64-bit\n"
9555"/// element in operand \\a V1 is copied to the same position in the\n"
9556"/// destination. When a mask bit is 1, the corresponding 64-bit element in\n"
9557"/// operand \\a V2 is copied to the same position in the destination.\n"
9558"/// \\returns A 256-bit vector of [4 x double] containing the copied values.\n"
9559"#define _mm256_blend_pd(V1, V2, M) \\\n"
9560" (__m256d)__builtin_ia32_blendpd256((__v4df)(__m256d)(V1), \\\n"
9561" (__v4df)(__m256d)(V2), (int)(M))\n"
9562"\n"
9563"/// Merges 32-bit single-precision data values stored in either of the\n"
9564"/// two 256-bit vectors of [8 x float], as specified by the immediate\n"
9565"/// integer operand.\n"
9566"///\n"
9567"/// \\headerfile <x86intrin.h>\n"
9568"///\n"
9569"/// \\code\n"
9570"/// __m256 _mm256_blend_ps(__m256 V1, __m256 V2, const int M);\n"
9571"/// \\endcode\n"
9572"///\n"
9573"/// This intrinsic corresponds to the <c> VBLENDPS </c> instruction.\n"
9574"///\n"
9575"/// \\param V1\n"
9576"/// A 256-bit vector of [8 x float].\n"
9577"/// \\param V2\n"
9578"/// A 256-bit vector of [8 x float].\n"
9579"/// \\param M\n"
9580"/// An immediate integer operand, with mask bits [7:0] specifying how the\n"
9581"/// values are to be copied. The position of the mask bit corresponds to the\n"
9582"/// index of a copied value. When a mask bit is 0, the corresponding 32-bit\n"
9583"/// element in operand \\a V1 is copied to the same position in the\n"
9584"/// destination. When a mask bit is 1, the corresponding 32-bit element in\n"
9585"/// operand \\a V2 is copied to the same position in the destination.\n"
9586"/// \\returns A 256-bit vector of [8 x float] containing the copied values.\n"
9587"#define _mm256_blend_ps(V1, V2, M) \\\n"
9588" (__m256)__builtin_ia32_blendps256((__v8sf)(__m256)(V1), \\\n"
9589" (__v8sf)(__m256)(V2), (int)(M))\n"
9590"\n"
9591"/// Merges 64-bit double-precision data values stored in either of the\n"
9592"/// two 256-bit vectors of [4 x double], as specified by the 256-bit vector\n"
9593"/// operand.\n"
9594"///\n"
9595"/// \\headerfile <x86intrin.h>\n"
9596"///\n"
9597"/// This intrinsic corresponds to the <c> VBLENDVPD </c> instruction.\n"
9598"///\n"
9599"/// \\param __a\n"
9600"/// A 256-bit vector of [4 x double].\n"
9601"/// \\param __b\n"
9602"/// A 256-bit vector of [4 x double].\n"
9603"/// \\param __c\n"
9604"/// A 256-bit vector operand, with mask bits 255, 191, 127, and 63 specifying\n"
9605"/// how the values are to be copied. The position of the mask bit corresponds\n"
9606"/// to the most significant bit of a copied value. When a mask bit is 0, the\n"
9607"/// corresponding 64-bit element in operand \\a __a is copied to the same\n"
9608"/// position in the destination. When a mask bit is 1, the corresponding\n"
9609"/// 64-bit element in operand \\a __b is copied to the same position in the\n"
9610"/// destination.\n"
9611"/// \\returns A 256-bit vector of [4 x double] containing the copied values.\n"
9612"static __inline __m256d __DEFAULT_FN_ATTRS\n"
9613"_mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c)\n"
9614"{\n"
9615" return (__m256d)__builtin_ia32_blendvpd256(\n"
9616" (__v4df)__a, (__v4df)__b, (__v4df)__c);\n"
9617"}\n"
9618"\n"
9619"/// Merges 32-bit single-precision data values stored in either of the\n"
9620"/// two 256-bit vectors of [8 x float], as specified by the 256-bit vector\n"
9621"/// operand.\n"
9622"///\n"
9623"/// \\headerfile <x86intrin.h>\n"
9624"///\n"
9625"/// This intrinsic corresponds to the <c> VBLENDVPS </c> instruction.\n"
9626"///\n"
9627"/// \\param __a\n"
9628"/// A 256-bit vector of [8 x float].\n"
9629"/// \\param __b\n"
9630"/// A 256-bit vector of [8 x float].\n"
9631"/// \\param __c\n"
9632"/// A 256-bit vector operand, with mask bits 255, 223, 191, 159, 127, 95, 63,\n"
9633"/// and 31 specifying how the values are to be copied. The position of the\n"
9634"/// mask bit corresponds to the most significant bit of a copied value. When\n"
9635"/// a mask bit is 0, the corresponding 32-bit element in operand \\a __a is\n"
9636"/// copied to the same position in the destination. When a mask bit is 1, the\n"
9637"/// corresponding 32-bit element in operand \\a __b is copied to the same\n"
9638"/// position in the destination.\n"
9639"/// \\returns A 256-bit vector of [8 x float] containing the copied values.\n"
9640"static __inline __m256 __DEFAULT_FN_ATTRS\n"
9641"_mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)\n"
9642"{\n"
9643" return (__m256)__builtin_ia32_blendvps256(\n"
9644" (__v8sf)__a, (__v8sf)__b, (__v8sf)__c);\n"
9645"}\n"
9646"\n"
9647"/* Vector Dot Product */\n"
9648"/// Computes two dot products in parallel, using the lower and upper\n"
9649"/// halves of two [8 x float] vectors as input to the two computations, and\n"
9650"/// returning the two dot products in the lower and upper halves of the\n"
9651"/// [8 x float] result.\n"
9652"///\n"
9653"/// The immediate integer operand controls which input elements will\n"
9654"/// contribute to the dot product, and where the final results are returned.\n"
9655"/// In general, for each dot product, the four corresponding elements of the\n"
9656"/// input vectors are multiplied; the first two and second two products are\n"
9657"/// summed, then the two sums are added to form the final result.\n"
9658"///\n"
9659"/// \\headerfile <x86intrin.h>\n"
9660"///\n"
9661"/// \\code\n"
9662"/// __m256 _mm256_dp_ps(__m256 V1, __m256 V2, const int M);\n"
9663"/// \\endcode\n"
9664"///\n"
9665"/// This intrinsic corresponds to the <c> VDPPS </c> instruction.\n"
9666"///\n"
9667"/// \\param V1\n"
9668"/// A vector of [8 x float] values, treated as two [4 x float] vectors.\n"
9669"/// \\param V2\n"
9670"/// A vector of [8 x float] values, treated as two [4 x float] vectors.\n"
9671"/// \\param M\n"
9672"/// An immediate integer argument. Bits [7:4] determine which elements of\n"
9673"/// the input vectors are used, with bit [4] corresponding to the lowest\n"
9674"/// element and bit [7] corresponding to the highest element of each [4 x\n"
9675"/// float] subvector. If a bit is set, the corresponding elements from the\n"
9676"/// two input vectors are used as an input for dot product; otherwise that\n"
9677"/// input is treated as zero. Bits [3:0] determine which elements of the\n"
9678"/// result will receive a copy of the final dot product, with bit [0]\n"
9679"/// corresponding to the lowest element and bit [3] corresponding to the\n"
9680"/// highest element of each [4 x float] subvector. If a bit is set, the dot\n"
9681"/// product is returned in the corresponding element; otherwise that element\n"
9682"/// is set to zero. The bitmask is applied in the same way to each of the\n"
9683"/// two parallel dot product computations.\n"
9684"/// \\returns A 256-bit vector of [8 x float] containing the two dot products.\n"
9685"#define _mm256_dp_ps(V1, V2, M) \\\n"
9686" (__m256)__builtin_ia32_dpps256((__v8sf)(__m256)(V1), \\\n"
9687" (__v8sf)(__m256)(V2), (M))\n"
9688"\n"
9689"/* Vector shuffle */\n"
9690"/// Selects 8 float values from the 256-bit operands of [8 x float], as\n"
9691"/// specified by the immediate value operand.\n"
9692"///\n"
9693"/// The four selected elements in each operand are copied to the destination\n"
9694"/// according to the bits specified in the immediate operand. The selected\n"
9695"/// elements from the first 256-bit operand are copied to bits [63:0] and\n"
9696"/// bits [191:128] of the destination, and the selected elements from the\n"
9697"/// second 256-bit operand are copied to bits [127:64] and bits [255:192] of\n"
9698"/// the destination. For example, if bits [7:0] of the immediate operand\n"
9699"/// contain a value of 0xFF, the 256-bit destination vector would contain the\n"
9700"/// following values: b[7], b[7], a[7], a[7], b[3], b[3], a[3], a[3].\n"
9701"///\n"
9702"/// \\headerfile <x86intrin.h>\n"
9703"///\n"
9704"/// \\code\n"
9705"/// __m256 _mm256_shuffle_ps(__m256 a, __m256 b, const int mask);\n"
9706"/// \\endcode\n"
9707"///\n"
9708"/// This intrinsic corresponds to the <c> VSHUFPS </c> instruction.\n"
9709"///\n"
9710"/// \\param a\n"
9711"/// A 256-bit vector of [8 x float]. The four selected elements in this\n"
9712"/// operand are copied to bits [63:0] and bits [191:128] in the destination,\n"
9713"/// according to the bits specified in the immediate operand.\n"
9714"/// \\param b\n"
9715"/// A 256-bit vector of [8 x float]. The four selected elements in this\n"
9716"/// operand are copied to bits [127:64] and bits [255:192] in the\n"
9717"/// destination, according to the bits specified in the immediate operand.\n"
9718"/// \\param mask\n"
9719"/// An immediate value containing an 8-bit value specifying which elements to\n"
9720"/// copy from \\a a and \\a b \\n.\n"
9721"/// Bits [3:0] specify the values copied from operand \\a a. \\n\n"
9722"/// Bits [7:4] specify the values copied from operand \\a b. \\n\n"
9723"/// The destinations within the 256-bit destination are assigned values as\n"
9724"/// follows, according to the bit value assignments described below: \\n\n"
9725"/// Bits [1:0] are used to assign values to bits [31:0] and [159:128] in the\n"
9726"/// destination. \\n\n"
9727"/// Bits [3:2] are used to assign values to bits [63:32] and [191:160] in the\n"
9728"/// destination. \\n\n"
9729"/// Bits [5:4] are used to assign values to bits [95:64] and [223:192] in the\n"
9730"/// destination. \\n\n"
9731"/// Bits [7:6] are used to assign values to bits [127:96] and [255:224] in\n"
9732"/// the destination. \\n\n"
9733"/// Bit value assignments: \\n\n"
9734"/// 00: Bits [31:0] and [159:128] are copied from the selected operand. \\n\n"
9735"/// 01: Bits [63:32] and [191:160] are copied from the selected operand. \\n\n"
9736"/// 10: Bits [95:64] and [223:192] are copied from the selected operand. \\n\n"
9737"/// 11: Bits [127:96] and [255:224] are copied from the selected operand.\n"
9738"/// \\returns A 256-bit vector of [8 x float] containing the shuffled values.\n"
9739"#define _mm256_shuffle_ps(a, b, mask) \\\n"
9740" (__m256)__builtin_ia32_shufps256((__v8sf)(__m256)(a), \\\n"
9741" (__v8sf)(__m256)(b), (int)(mask))\n"
9742"\n"
9743"/// Selects four double-precision values from the 256-bit operands of\n"
9744"/// [4 x double], as specified by the immediate value operand.\n"
9745"///\n"
9746"/// The selected elements from the first 256-bit operand are copied to bits\n"
9747"/// [63:0] and bits [191:128] in the destination, and the selected elements\n"
9748"/// from the second 256-bit operand are copied to bits [127:64] and bits\n"
9749"/// [255:192] in the destination. For example, if bits [3:0] of the immediate\n"
9750"/// operand contain a value of 0xF, the 256-bit destination vector would\n"
9751"/// contain the following values: b[3], a[3], b[1], a[1].\n"
9752"///\n"
9753"/// \\headerfile <x86intrin.h>\n"
9754"///\n"
9755"/// \\code\n"
9756"/// __m256d _mm256_shuffle_pd(__m256d a, __m256d b, const int mask);\n"
9757"/// \\endcode\n"
9758"///\n"
9759"/// This intrinsic corresponds to the <c> VSHUFPD </c> instruction.\n"
9760"///\n"
9761"/// \\param a\n"
9762"/// A 256-bit vector of [4 x double].\n"
9763"/// \\param b\n"
9764"/// A 256-bit vector of [4 x double].\n"
9765"/// \\param mask\n"
9766"/// An immediate value containing 8-bit values specifying which elements to\n"
9767"/// copy from \\a a and \\a b: \\n\n"
9768"/// Bit [0]=0: Bits [63:0] are copied from \\a a to bits [63:0] of the\n"
9769"/// destination. \\n\n"
9770"/// Bit [0]=1: Bits [127:64] are copied from \\a a to bits [63:0] of the\n"
9771"/// destination. \\n\n"
9772"/// Bit [1]=0: Bits [63:0] are copied from \\a b to bits [127:64] of the\n"
9773"/// destination. \\n\n"
9774"/// Bit [1]=1: Bits [127:64] are copied from \\a b to bits [127:64] of the\n"
9775"/// destination. \\n\n"
9776"/// Bit [2]=0: Bits [191:128] are copied from \\a a to bits [191:128] of the\n"
9777"/// destination. \\n\n"
9778"/// Bit [2]=1: Bits [255:192] are copied from \\a a to bits [191:128] of the\n"
9779"/// destination. \\n\n"
9780"/// Bit [3]=0: Bits [191:128] are copied from \\a b to bits [255:192] of the\n"
9781"/// destination. \\n\n"
9782"/// Bit [3]=1: Bits [255:192] are copied from \\a b to bits [255:192] of the\n"
9783"/// destination.\n"
9784"/// \\returns A 256-bit vector of [4 x double] containing the shuffled values.\n"
9785"#define _mm256_shuffle_pd(a, b, mask) \\\n"
9786" (__m256d)__builtin_ia32_shufpd256((__v4df)(__m256d)(a), \\\n"
9787" (__v4df)(__m256d)(b), (int)(mask))\n"
9788"\n"
9789"/* Compare */\n"
9790"#define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */\n"
9791"#define _CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */\n"
9792"#define _CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */\n"
9793"#define _CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */\n"
9794"#define _CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */\n"
9795"#define _CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */\n"
9796"#define _CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */\n"
9797"#define _CMP_ORD_Q 0x07 /* Ordered (non-signaling) */\n"
9798"#define _CMP_EQ_UQ 0x08 /* Equal (unordered, non-signaling) */\n"
9799"#define _CMP_NGE_US 0x09 /* Not-greater-than-or-equal (unordered, signaling) */\n"
9800"#define _CMP_NGT_US 0x0a /* Not-greater-than (unordered, signaling) */\n"
9801"#define _CMP_FALSE_OQ 0x0b /* False (ordered, non-signaling) */\n"
9802"#define _CMP_NEQ_OQ 0x0c /* Not-equal (ordered, non-signaling) */\n"
9803"#define _CMP_GE_OS 0x0d /* Greater-than-or-equal (ordered, signaling) */\n"
9804"#define _CMP_GT_OS 0x0e /* Greater-than (ordered, signaling) */\n"
9805"#define _CMP_TRUE_UQ 0x0f /* True (unordered, non-signaling) */\n"
9806"#define _CMP_EQ_OS 0x10 /* Equal (ordered, signaling) */\n"
9807"#define _CMP_LT_OQ 0x11 /* Less-than (ordered, non-signaling) */\n"
9808"#define _CMP_LE_OQ 0x12 /* Less-than-or-equal (ordered, non-signaling) */\n"
9809"#define _CMP_UNORD_S 0x13 /* Unordered (signaling) */\n"
9810"#define _CMP_NEQ_US 0x14 /* Not-equal (unordered, signaling) */\n"
9811"#define _CMP_NLT_UQ 0x15 /* Not-less-than (unordered, non-signaling) */\n"
9812"#define _CMP_NLE_UQ 0x16 /* Not-less-than-or-equal (unordered, non-signaling) */\n"
9813"#define _CMP_ORD_S 0x17 /* Ordered (signaling) */\n"
9814"#define _CMP_EQ_US 0x18 /* Equal (unordered, signaling) */\n"
9815"#define _CMP_NGE_UQ 0x19 /* Not-greater-than-or-equal (unordered, non-signaling) */\n"
9816"#define _CMP_NGT_UQ 0x1a /* Not-greater-than (unordered, non-signaling) */\n"
9817"#define _CMP_FALSE_OS 0x1b /* False (ordered, signaling) */\n"
9818"#define _CMP_NEQ_OS 0x1c /* Not-equal (ordered, signaling) */\n"
9819"#define _CMP_GE_OQ 0x1d /* Greater-than-or-equal (ordered, non-signaling) */\n"
9820"#define _CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */\n"
9821"#define _CMP_TRUE_US 0x1f /* True (unordered, signaling) */\n"
9822"\n"
9823"/// Compares each of the corresponding double-precision values of two\n"
9824"/// 128-bit vectors of [2 x double], using the operation specified by the\n"
9825"/// immediate integer operand.\n"
9826"///\n"
9827"/// Returns a [2 x double] vector consisting of two doubles corresponding to\n"
9828"/// the two comparison results: zero if the comparison is false, and all 1's\n"
9829"/// if the comparison is true.\n"
9830"///\n"
9831"/// \\headerfile <x86intrin.h>\n"
9832"///\n"
9833"/// \\code\n"
9834"/// __m128d _mm_cmp_pd(__m128d a, __m128d b, const int c);\n"
9835"/// \\endcode\n"
9836"///\n"
9837"/// This intrinsic corresponds to the <c> VCMPPD </c> instruction.\n"
9838"///\n"
9839"/// \\param a\n"
9840"/// A 128-bit vector of [2 x double].\n"
9841"/// \\param b\n"
9842"/// A 128-bit vector of [2 x double].\n"
9843"/// \\param c\n"
9844"/// An immediate integer operand, with bits [4:0] specifying which comparison\n"
9845"/// operation to use: \\n\n"
9846"/// 0x00: Equal (ordered, non-signaling) \\n\n"
9847"/// 0x01: Less-than (ordered, signaling) \\n\n"
9848"/// 0x02: Less-than-or-equal (ordered, signaling) \\n\n"
9849"/// 0x03: Unordered (non-signaling) \\n\n"
9850"/// 0x04: Not-equal (unordered, non-signaling) \\n\n"
9851"/// 0x05: Not-less-than (unordered, signaling) \\n\n"
9852"/// 0x06: Not-less-than-or-equal (unordered, signaling) \\n\n"
9853"/// 0x07: Ordered (non-signaling) \\n\n"
9854"/// 0x08: Equal (unordered, non-signaling) \\n\n"
9855"/// 0x09: Not-greater-than-or-equal (unordered, signaling) \\n\n"
9856"/// 0x0A: Not-greater-than (unordered, signaling) \\n\n"
9857"/// 0x0B: False (ordered, non-signaling) \\n\n"
9858"/// 0x0C: Not-equal (ordered, non-signaling) \\n\n"
9859"/// 0x0D: Greater-than-or-equal (ordered, signaling) \\n\n"
9860"/// 0x0E: Greater-than (ordered, signaling) \\n\n"
9861"/// 0x0F: True (unordered, non-signaling) \\n\n"
9862"/// 0x10: Equal (ordered, signaling) \\n\n"
9863"/// 0x11: Less-than (ordered, non-signaling) \\n\n"
9864"/// 0x12: Less-than-or-equal (ordered, non-signaling) \\n\n"
9865"/// 0x13: Unordered (signaling) \\n\n"
9866"/// 0x14: Not-equal (unordered, signaling) \\n\n"
9867"/// 0x15: Not-less-than (unordered, non-signaling) \\n\n"
9868"/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \\n\n"
9869"/// 0x17: Ordered (signaling) \\n\n"
9870"/// 0x18: Equal (unordered, signaling) \\n\n"
9871"/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \\n\n"
9872"/// 0x1A: Not-greater-than (unordered, non-signaling) \\n\n"
9873"/// 0x1B: False (ordered, signaling) \\n\n"
9874"/// 0x1C: Not-equal (ordered, signaling) \\n\n"
9875"/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \\n\n"
9876"/// 0x1E: Greater-than (ordered, non-signaling) \\n\n"
9877"/// 0x1F: True (unordered, signaling)\n"
9878"/// \\returns A 128-bit vector of [2 x double] containing the comparison results.\n"
9879"#define _mm_cmp_pd(a, b, c) \\\n"
9880" (__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), \\\n"
9881" (__v2df)(__m128d)(b), (c))\n"
9882"\n"
9883"/// Compares each of the corresponding values of two 128-bit vectors of\n"
9884"/// [4 x float], using the operation specified by the immediate integer\n"
9885"/// operand.\n"
9886"///\n"
9887"/// Returns a [4 x float] vector consisting of four floats corresponding to\n"
9888"/// the four comparison results: zero if the comparison is false, and all 1's\n"
9889"/// if the comparison is true.\n"
9890"///\n"
9891"/// \\headerfile <x86intrin.h>\n"
9892"///\n"
9893"/// \\code\n"
9894"/// __m128 _mm_cmp_ps(__m128 a, __m128 b, const int c);\n"
9895"/// \\endcode\n"
9896"///\n"
9897"/// This intrinsic corresponds to the <c> VCMPPS </c> instruction.\n"
9898"///\n"
9899"/// \\param a\n"
9900"/// A 128-bit vector of [4 x float].\n"
9901"/// \\param b\n"
9902"/// A 128-bit vector of [4 x float].\n"
9903"/// \\param c\n"
9904"/// An immediate integer operand, with bits [4:0] specifying which comparison\n"
9905"/// operation to use: \\n\n"
9906"/// 0x00: Equal (ordered, non-signaling) \\n\n"
9907"/// 0x01: Less-than (ordered, signaling) \\n\n"
9908"/// 0x02: Less-than-or-equal (ordered, signaling) \\n\n"
9909"/// 0x03: Unordered (non-signaling) \\n\n"
9910"/// 0x04: Not-equal (unordered, non-signaling) \\n\n"
9911"/// 0x05: Not-less-than (unordered, signaling) \\n\n"
9912"/// 0x06: Not-less-than-or-equal (unordered, signaling) \\n\n"
9913"/// 0x07: Ordered (non-signaling) \\n\n"
9914"/// 0x08: Equal (unordered, non-signaling) \\n\n"
9915"/// 0x09: Not-greater-than-or-equal (unordered, signaling) \\n\n"
9916"/// 0x0A: Not-greater-than (unordered, signaling) \\n\n"
9917"/// 0x0B: False (ordered, non-signaling) \\n\n"
9918"/// 0x0C: Not-equal (ordered, non-signaling) \\n\n"
9919"/// 0x0D: Greater-than-or-equal (ordered, signaling) \\n\n"
9920"/// 0x0E: Greater-than (ordered, signaling) \\n\n"
9921"/// 0x0F: True (unordered, non-signaling) \\n\n"
9922"/// 0x10: Equal (ordered, signaling) \\n\n"
9923"/// 0x11: Less-than (ordered, non-signaling) \\n\n"
9924"/// 0x12: Less-than-or-equal (ordered, non-signaling) \\n\n"
9925"/// 0x13: Unordered (signaling) \\n\n"
9926"/// 0x14: Not-equal (unordered, signaling) \\n\n"
9927"/// 0x15: Not-less-than (unordered, non-signaling) \\n\n"
9928"/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \\n\n"
9929"/// 0x17: Ordered (signaling) \\n\n"
9930"/// 0x18: Equal (unordered, signaling) \\n\n"
9931"/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \\n\n"
9932"/// 0x1A: Not-greater-than (unordered, non-signaling) \\n\n"
9933"/// 0x1B: False (ordered, signaling) \\n\n"
9934"/// 0x1C: Not-equal (ordered, signaling) \\n\n"
9935"/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \\n\n"
9936"/// 0x1E: Greater-than (ordered, non-signaling) \\n\n"
9937"/// 0x1F: True (unordered, signaling)\n"
9938"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
9939"#define _mm_cmp_ps(a, b, c) \\\n"
9940" (__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), \\\n"
9941" (__v4sf)(__m128)(b), (c))\n"
9942"\n"
9943"/// Compares each of the corresponding double-precision values of two\n"
9944"/// 256-bit vectors of [4 x double], using the operation specified by the\n"
9945"/// immediate integer operand.\n"
9946"///\n"
9947"/// Returns a [4 x double] vector consisting of four doubles corresponding to\n"
9948"/// the four comparison results: zero if the comparison is false, and all 1's\n"
9949"/// if the comparison is true.\n"
9950"///\n"
9951"/// \\headerfile <x86intrin.h>\n"
9952"///\n"
9953"/// \\code\n"
9954"/// __m256d _mm256_cmp_pd(__m256d a, __m256d b, const int c);\n"
9955"/// \\endcode\n"
9956"///\n"
9957"/// This intrinsic corresponds to the <c> VCMPPD </c> instruction.\n"
9958"///\n"
9959"/// \\param a\n"
9960"/// A 256-bit vector of [4 x double].\n"
9961"/// \\param b\n"
9962"/// A 256-bit vector of [4 x double].\n"
9963"/// \\param c\n"
9964"/// An immediate integer operand, with bits [4:0] specifying which comparison\n"
9965"/// operation to use: \\n\n"
9966"/// 0x00: Equal (ordered, non-signaling) \\n\n"
9967"/// 0x01: Less-than (ordered, signaling) \\n\n"
9968"/// 0x02: Less-than-or-equal (ordered, signaling) \\n\n"
9969"/// 0x03: Unordered (non-signaling) \\n\n"
9970"/// 0x04: Not-equal (unordered, non-signaling) \\n\n"
9971"/// 0x05: Not-less-than (unordered, signaling) \\n\n"
9972"/// 0x06: Not-less-than-or-equal (unordered, signaling) \\n\n"
9973"/// 0x07: Ordered (non-signaling) \\n\n"
9974"/// 0x08: Equal (unordered, non-signaling) \\n\n"
9975"/// 0x09: Not-greater-than-or-equal (unordered, signaling) \\n\n"
9976"/// 0x0A: Not-greater-than (unordered, signaling) \\n\n"
9977"/// 0x0B: False (ordered, non-signaling) \\n\n"
9978"/// 0x0C: Not-equal (ordered, non-signaling) \\n\n"
9979"/// 0x0D: Greater-than-or-equal (ordered, signaling) \\n\n"
9980"/// 0x0E: Greater-than (ordered, signaling) \\n\n"
9981"/// 0x0F: True (unordered, non-signaling) \\n\n"
9982"/// 0x10: Equal (ordered, signaling) \\n\n"
9983"/// 0x11: Less-than (ordered, non-signaling) \\n\n"
9984"/// 0x12: Less-than-or-equal (ordered, non-signaling) \\n\n"
9985"/// 0x13: Unordered (signaling) \\n\n"
9986"/// 0x14: Not-equal (unordered, signaling) \\n\n"
9987"/// 0x15: Not-less-than (unordered, non-signaling) \\n\n"
9988"/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \\n\n"
9989"/// 0x17: Ordered (signaling) \\n\n"
9990"/// 0x18: Equal (unordered, signaling) \\n\n"
9991"/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \\n\n"
9992"/// 0x1A: Not-greater-than (unordered, non-signaling) \\n\n"
9993"/// 0x1B: False (ordered, signaling) \\n\n"
9994"/// 0x1C: Not-equal (ordered, signaling) \\n\n"
9995"/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \\n\n"
9996"/// 0x1E: Greater-than (ordered, non-signaling) \\n\n"
9997"/// 0x1F: True (unordered, signaling)\n"
9998"/// \\returns A 256-bit vector of [4 x double] containing the comparison results.\n"
9999"#define _mm256_cmp_pd(a, b, c) \\\n"
10000" (__m256d)__builtin_ia32_cmppd256((__v4df)(__m256d)(a), \\\n"
10001" (__v4df)(__m256d)(b), (c))\n"
10002"\n"
10003"/// Compares each of the corresponding values of two 256-bit vectors of\n"
10004"/// [8 x float], using the operation specified by the immediate integer\n"
10005"/// operand.\n"
10006"///\n"
10007"/// Returns a [8 x float] vector consisting of eight floats corresponding to\n"
10008"/// the eight comparison results: zero if the comparison is false, and all\n"
10009"/// 1's if the comparison is true.\n"
10010"///\n"
10011"/// \\headerfile <x86intrin.h>\n"
10012"///\n"
10013"/// \\code\n"
10014"/// __m256 _mm256_cmp_ps(__m256 a, __m256 b, const int c);\n"
10015"/// \\endcode\n"
10016"///\n"
10017"/// This intrinsic corresponds to the <c> VCMPPS </c> instruction.\n"
10018"///\n"
10019"/// \\param a\n"
10020"/// A 256-bit vector of [8 x float].\n"
10021"/// \\param b\n"
10022"/// A 256-bit vector of [8 x float].\n"
10023"/// \\param c\n"
10024"/// An immediate integer operand, with bits [4:0] specifying which comparison\n"
10025"/// operation to use: \\n\n"
10026"/// 0x00: Equal (ordered, non-signaling) \\n\n"
10027"/// 0x01: Less-than (ordered, signaling) \\n\n"
10028"/// 0x02: Less-than-or-equal (ordered, signaling) \\n\n"
10029"/// 0x03: Unordered (non-signaling) \\n\n"
10030"/// 0x04: Not-equal (unordered, non-signaling) \\n\n"
10031"/// 0x05: Not-less-than (unordered, signaling) \\n\n"
10032"/// 0x06: Not-less-than-or-equal (unordered, signaling) \\n\n"
10033"/// 0x07: Ordered (non-signaling) \\n\n"
10034"/// 0x08: Equal (unordered, non-signaling) \\n\n"
10035"/// 0x09: Not-greater-than-or-equal (unordered, signaling) \\n\n"
10036"/// 0x0A: Not-greater-than (unordered, signaling) \\n\n"
10037"/// 0x0B: False (ordered, non-signaling) \\n\n"
10038"/// 0x0C: Not-equal (ordered, non-signaling) \\n\n"
10039"/// 0x0D: Greater-than-or-equal (ordered, signaling) \\n\n"
10040"/// 0x0E: Greater-than (ordered, signaling) \\n\n"
10041"/// 0x0F: True (unordered, non-signaling) \\n\n"
10042"/// 0x10: Equal (ordered, signaling) \\n\n"
10043"/// 0x11: Less-than (ordered, non-signaling) \\n\n"
10044"/// 0x12: Less-than-or-equal (ordered, non-signaling) \\n\n"
10045"/// 0x13: Unordered (signaling) \\n\n"
10046"/// 0x14: Not-equal (unordered, signaling) \\n\n"
10047"/// 0x15: Not-less-than (unordered, non-signaling) \\n\n"
10048"/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \\n\n"
10049"/// 0x17: Ordered (signaling) \\n\n"
10050"/// 0x18: Equal (unordered, signaling) \\n\n"
10051"/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \\n\n"
10052"/// 0x1A: Not-greater-than (unordered, non-signaling) \\n\n"
10053"/// 0x1B: False (ordered, signaling) \\n\n"
10054"/// 0x1C: Not-equal (ordered, signaling) \\n\n"
10055"/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \\n\n"
10056"/// 0x1E: Greater-than (ordered, non-signaling) \\n\n"
10057"/// 0x1F: True (unordered, signaling)\n"
10058"/// \\returns A 256-bit vector of [8 x float] containing the comparison results.\n"
10059"#define _mm256_cmp_ps(a, b, c) \\\n"
10060" (__m256)__builtin_ia32_cmpps256((__v8sf)(__m256)(a), \\\n"
10061" (__v8sf)(__m256)(b), (c))\n"
10062"\n"
10063"/// Compares each of the corresponding scalar double-precision values of\n"
10064"/// two 128-bit vectors of [2 x double], using the operation specified by the\n"
10065"/// immediate integer operand.\n"
10066"///\n"
10067"/// If the result is true, all 64 bits of the destination vector are set;\n"
10068"/// otherwise they are cleared.\n"
10069"///\n"
10070"/// \\headerfile <x86intrin.h>\n"
10071"///\n"
10072"/// \\code\n"
10073"/// __m128d _mm_cmp_sd(__m128d a, __m128d b, const int c);\n"
10074"/// \\endcode\n"
10075"///\n"
10076"/// This intrinsic corresponds to the <c> VCMPSD </c> instruction.\n"
10077"///\n"
10078"/// \\param a\n"
10079"/// A 128-bit vector of [2 x double].\n"
10080"/// \\param b\n"
10081"/// A 128-bit vector of [2 x double].\n"
10082"/// \\param c\n"
10083"/// An immediate integer operand, with bits [4:0] specifying which comparison\n"
10084"/// operation to use: \\n\n"
10085"/// 0x00: Equal (ordered, non-signaling) \\n\n"
10086"/// 0x01: Less-than (ordered, signaling) \\n\n"
10087"/// 0x02: Less-than-or-equal (ordered, signaling) \\n\n"
10088"/// 0x03: Unordered (non-signaling) \\n\n"
10089"/// 0x04: Not-equal (unordered, non-signaling) \\n\n"
10090"/// 0x05: Not-less-than (unordered, signaling) \\n\n"
10091"/// 0x06: Not-less-than-or-equal (unordered, signaling) \\n\n"
10092"/// 0x07: Ordered (non-signaling) \\n\n"
10093"/// 0x08: Equal (unordered, non-signaling) \\n\n"
10094"/// 0x09: Not-greater-than-or-equal (unordered, signaling) \\n\n"
10095"/// 0x0A: Not-greater-than (unordered, signaling) \\n\n"
10096"/// 0x0B: False (ordered, non-signaling) \\n\n"
10097"/// 0x0C: Not-equal (ordered, non-signaling) \\n\n"
10098"/// 0x0D: Greater-than-or-equal (ordered, signaling) \\n\n"
10099"/// 0x0E: Greater-than (ordered, signaling) \\n\n"
10100"/// 0x0F: True (unordered, non-signaling) \\n\n"
10101"/// 0x10: Equal (ordered, signaling) \\n\n"
10102"/// 0x11: Less-than (ordered, non-signaling) \\n\n"
10103"/// 0x12: Less-than-or-equal (ordered, non-signaling) \\n\n"
10104"/// 0x13: Unordered (signaling) \\n\n"
10105"/// 0x14: Not-equal (unordered, signaling) \\n\n"
10106"/// 0x15: Not-less-than (unordered, non-signaling) \\n\n"
10107"/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \\n\n"
10108"/// 0x17: Ordered (signaling) \\n\n"
10109"/// 0x18: Equal (unordered, signaling) \\n\n"
10110"/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \\n\n"
10111"/// 0x1A: Not-greater-than (unordered, non-signaling) \\n\n"
10112"/// 0x1B: False (ordered, signaling) \\n\n"
10113"/// 0x1C: Not-equal (ordered, signaling) \\n\n"
10114"/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \\n\n"
10115"/// 0x1E: Greater-than (ordered, non-signaling) \\n\n"
10116"/// 0x1F: True (unordered, signaling)\n"
10117"/// \\returns A 128-bit vector of [2 x double] containing the comparison results.\n"
10118"#define _mm_cmp_sd(a, b, c) \\\n"
10119" (__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), \\\n"
10120" (__v2df)(__m128d)(b), (c))\n"
10121"\n"
10122"/// Compares each of the corresponding scalar values of two 128-bit\n"
10123"/// vectors of [4 x float], using the operation specified by the immediate\n"
10124"/// integer operand.\n"
10125"///\n"
10126"/// If the result is true, all 32 bits of the destination vector are set;\n"
10127"/// otherwise they are cleared.\n"
10128"///\n"
10129"/// \\headerfile <x86intrin.h>\n"
10130"///\n"
10131"/// \\code\n"
10132"/// __m128 _mm_cmp_ss(__m128 a, __m128 b, const int c);\n"
10133"/// \\endcode\n"
10134"///\n"
10135"/// This intrinsic corresponds to the <c> VCMPSS </c> instruction.\n"
10136"///\n"
10137"/// \\param a\n"
10138"/// A 128-bit vector of [4 x float].\n"
10139"/// \\param b\n"
10140"/// A 128-bit vector of [4 x float].\n"
10141"/// \\param c\n"
10142"/// An immediate integer operand, with bits [4:0] specifying which comparison\n"
10143"/// operation to use: \\n\n"
10144"/// 0x00: Equal (ordered, non-signaling) \\n\n"
10145"/// 0x01: Less-than (ordered, signaling) \\n\n"
10146"/// 0x02: Less-than-or-equal (ordered, signaling) \\n\n"
10147"/// 0x03: Unordered (non-signaling) \\n\n"
10148"/// 0x04: Not-equal (unordered, non-signaling) \\n\n"
10149"/// 0x05: Not-less-than (unordered, signaling) \\n\n"
10150"/// 0x06: Not-less-than-or-equal (unordered, signaling) \\n\n"
10151"/// 0x07: Ordered (non-signaling) \\n\n"
10152"/// 0x08: Equal (unordered, non-signaling) \\n\n"
10153"/// 0x09: Not-greater-than-or-equal (unordered, signaling) \\n\n"
10154"/// 0x0A: Not-greater-than (unordered, signaling) \\n\n"
10155"/// 0x0B: False (ordered, non-signaling) \\n\n"
10156"/// 0x0C: Not-equal (ordered, non-signaling) \\n\n"
10157"/// 0x0D: Greater-than-or-equal (ordered, signaling) \\n\n"
10158"/// 0x0E: Greater-than (ordered, signaling) \\n\n"
10159"/// 0x0F: True (unordered, non-signaling) \\n\n"
10160"/// 0x10: Equal (ordered, signaling) \\n\n"
10161"/// 0x11: Less-than (ordered, non-signaling) \\n\n"
10162"/// 0x12: Less-than-or-equal (ordered, non-signaling) \\n\n"
10163"/// 0x13: Unordered (signaling) \\n\n"
10164"/// 0x14: Not-equal (unordered, signaling) \\n\n"
10165"/// 0x15: Not-less-than (unordered, non-signaling) \\n\n"
10166"/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \\n\n"
10167"/// 0x17: Ordered (signaling) \\n\n"
10168"/// 0x18: Equal (unordered, signaling) \\n\n"
10169"/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \\n\n"
10170"/// 0x1A: Not-greater-than (unordered, non-signaling) \\n\n"
10171"/// 0x1B: False (ordered, signaling) \\n\n"
10172"/// 0x1C: Not-equal (ordered, signaling) \\n\n"
10173"/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \\n\n"
10174"/// 0x1E: Greater-than (ordered, non-signaling) \\n\n"
10175"/// 0x1F: True (unordered, signaling)\n"
10176"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
10177"#define _mm_cmp_ss(a, b, c) \\\n"
10178" (__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), \\\n"
10179" (__v4sf)(__m128)(b), (c))\n"
10180"\n"
10181"/// Takes a [8 x i32] vector and returns the vector element value\n"
10182"/// indexed by the immediate constant operand.\n"
10183"///\n"
10184"/// \\headerfile <x86intrin.h>\n"
10185"///\n"
10186"/// This intrinsic corresponds to the <c> VEXTRACTF128+COMPOSITE </c>\n"
10187"/// instruction.\n"
10188"///\n"
10189"/// \\param __a\n"
10190"/// A 256-bit vector of [8 x i32].\n"
10191"/// \\param __imm\n"
10192"/// An immediate integer operand with bits [2:0] determining which vector\n"
10193"/// element is extracted and returned.\n"
10194"/// \\returns A 32-bit integer containing the extracted 32 bits of extended\n"
10195"/// packed data.\n"
10196"#define _mm256_extract_epi32(X, N) \\\n"
10197" (int)__builtin_ia32_vec_ext_v8si((__v8si)(__m256i)(X), (int)(N))\n"
10198"\n"
10199"/// Takes a [16 x i16] vector and returns the vector element value\n"
10200"/// indexed by the immediate constant operand.\n"
10201"///\n"
10202"/// \\headerfile <x86intrin.h>\n"
10203"///\n"
10204"/// This intrinsic corresponds to the <c> VEXTRACTF128+COMPOSITE </c>\n"
10205"/// instruction.\n"
10206"///\n"
10207"/// \\param __a\n"
10208"/// A 256-bit integer vector of [16 x i16].\n"
10209"/// \\param __imm\n"
10210"/// An immediate integer operand with bits [3:0] determining which vector\n"
10211"/// element is extracted and returned.\n"
10212"/// \\returns A 32-bit integer containing the extracted 16 bits of zero extended\n"
10213"/// packed data.\n"
10214"#define _mm256_extract_epi16(X, N) \\\n"
10215" (int)(unsigned short)__builtin_ia32_vec_ext_v16hi((__v16hi)(__m256i)(X), \\\n"
10216" (int)(N))\n"
10217"\n"
10218"/// Takes a [32 x i8] vector and returns the vector element value\n"
10219"/// indexed by the immediate constant operand.\n"
10220"///\n"
10221"/// \\headerfile <x86intrin.h>\n"
10222"///\n"
10223"/// This intrinsic corresponds to the <c> VEXTRACTF128+COMPOSITE </c>\n"
10224"/// instruction.\n"
10225"///\n"
10226"/// \\param __a\n"
10227"/// A 256-bit integer vector of [32 x i8].\n"
10228"/// \\param __imm\n"
10229"/// An immediate integer operand with bits [4:0] determining which vector\n"
10230"/// element is extracted and returned.\n"
10231"/// \\returns A 32-bit integer containing the extracted 8 bits of zero extended\n"
10232"/// packed data.\n"
10233"#define _mm256_extract_epi8(X, N) \\\n"
10234" (int)(unsigned char)__builtin_ia32_vec_ext_v32qi((__v32qi)(__m256i)(X), \\\n"
10235" (int)(N))\n"
10236"\n"
10237"#ifdef __x86_64__\n"
10238"/// Takes a [4 x i64] vector and returns the vector element value\n"
10239"/// indexed by the immediate constant operand.\n"
10240"///\n"
10241"/// \\headerfile <x86intrin.h>\n"
10242"///\n"
10243"/// This intrinsic corresponds to the <c> VEXTRACTF128+COMPOSITE </c>\n"
10244"/// instruction.\n"
10245"///\n"
10246"/// \\param __a\n"
10247"/// A 256-bit integer vector of [4 x i64].\n"
10248"/// \\param __imm\n"
10249"/// An immediate integer operand with bits [1:0] determining which vector\n"
10250"/// element is extracted and returned.\n"
10251"/// \\returns A 64-bit integer containing the extracted 64 bits of extended\n"
10252"/// packed data.\n"
10253"#define _mm256_extract_epi64(X, N) \\\n"
10254" (long long)__builtin_ia32_vec_ext_v4di((__v4di)(__m256i)(X), (int)(N))\n"
10255"#endif\n"
10256"\n"
10257"/// Takes a [8 x i32] vector and replaces the vector element value\n"
10258"/// indexed by the immediate constant operand by a new value. Returns the\n"
10259"/// modified vector.\n"
10260"///\n"
10261"/// \\headerfile <x86intrin.h>\n"
10262"///\n"
10263"/// This intrinsic corresponds to the <c> VINSERTF128+COMPOSITE </c>\n"
10264"/// instruction.\n"
10265"///\n"
10266"/// \\param __a\n"
10267"/// A vector of [8 x i32] to be used by the insert operation.\n"
10268"/// \\param __b\n"
10269"/// An integer value. The replacement value for the insert operation.\n"
10270"/// \\param __imm\n"
10271"/// An immediate integer specifying the index of the vector element to be\n"
10272"/// replaced.\n"
10273"/// \\returns A copy of vector \\a __a, after replacing its element indexed by\n"
10274"/// \\a __imm with \\a __b.\n"
10275"#define _mm256_insert_epi32(X, I, N) \\\n"
10276" (__m256i)__builtin_ia32_vec_set_v8si((__v8si)(__m256i)(X), \\\n"
10277" (int)(I), (int)(N))\n"
10278"\n"
10279"\n"
10280"/// Takes a [16 x i16] vector and replaces the vector element value\n"
10281"/// indexed by the immediate constant operand with a new value. Returns the\n"
10282"/// modified vector.\n"
10283"///\n"
10284"/// \\headerfile <x86intrin.h>\n"
10285"///\n"
10286"/// This intrinsic corresponds to the <c> VINSERTF128+COMPOSITE </c>\n"
10287"/// instruction.\n"
10288"///\n"
10289"/// \\param __a\n"
10290"/// A vector of [16 x i16] to be used by the insert operation.\n"
10291"/// \\param __b\n"
10292"/// An i16 integer value. The replacement value for the insert operation.\n"
10293"/// \\param __imm\n"
10294"/// An immediate integer specifying the index of the vector element to be\n"
10295"/// replaced.\n"
10296"/// \\returns A copy of vector \\a __a, after replacing its element indexed by\n"
10297"/// \\a __imm with \\a __b.\n"
10298"#define _mm256_insert_epi16(X, I, N) \\\n"
10299" (__m256i)__builtin_ia32_vec_set_v16hi((__v16hi)(__m256i)(X), \\\n"
10300" (int)(I), (int)(N))\n"
10301"\n"
10302"/// Takes a [32 x i8] vector and replaces the vector element value\n"
10303"/// indexed by the immediate constant operand with a new value. Returns the\n"
10304"/// modified vector.\n"
10305"///\n"
10306"/// \\headerfile <x86intrin.h>\n"
10307"///\n"
10308"/// This intrinsic corresponds to the <c> VINSERTF128+COMPOSITE </c>\n"
10309"/// instruction.\n"
10310"///\n"
10311"/// \\param __a\n"
10312"/// A vector of [32 x i8] to be used by the insert operation.\n"
10313"/// \\param __b\n"
10314"/// An i8 integer value. The replacement value for the insert operation.\n"
10315"/// \\param __imm\n"
10316"/// An immediate integer specifying the index of the vector element to be\n"
10317"/// replaced.\n"
10318"/// \\returns A copy of vector \\a __a, after replacing its element indexed by\n"
10319"/// \\a __imm with \\a __b.\n"
10320"#define _mm256_insert_epi8(X, I, N) \\\n"
10321" (__m256i)__builtin_ia32_vec_set_v32qi((__v32qi)(__m256i)(X), \\\n"
10322" (int)(I), (int)(N))\n"
10323"\n"
10324"#ifdef __x86_64__\n"
10325"/// Takes a [4 x i64] vector and replaces the vector element value\n"
10326"/// indexed by the immediate constant operand with a new value. Returns the\n"
10327"/// modified vector.\n"
10328"///\n"
10329"/// \\headerfile <x86intrin.h>\n"
10330"///\n"
10331"/// This intrinsic corresponds to the <c> VINSERTF128+COMPOSITE </c>\n"
10332"/// instruction.\n"
10333"///\n"
10334"/// \\param __a\n"
10335"/// A vector of [4 x i64] to be used by the insert operation.\n"
10336"/// \\param __b\n"
10337"/// A 64-bit integer value. The replacement value for the insert operation.\n"
10338"/// \\param __imm\n"
10339"/// An immediate integer specifying the index of the vector element to be\n"
10340"/// replaced.\n"
10341"/// \\returns A copy of vector \\a __a, after replacing its element indexed by\n"
10342"/// \\a __imm with \\a __b.\n"
10343"#define _mm256_insert_epi64(X, I, N) \\\n"
10344" (__m256i)__builtin_ia32_vec_set_v4di((__v4di)(__m256i)(X), \\\n"
10345" (long long)(I), (int)(N))\n"
10346"#endif\n"
10347"\n"
10348"/* Conversion */\n"
10349"/// Converts a vector of [4 x i32] into a vector of [4 x double].\n"
10350"///\n"
10351"/// \\headerfile <x86intrin.h>\n"
10352"///\n"
10353"/// This intrinsic corresponds to the <c> VCVTDQ2PD </c> instruction.\n"
10354"///\n"
10355"/// \\param __a\n"
10356"/// A 128-bit integer vector of [4 x i32].\n"
10357"/// \\returns A 256-bit vector of [4 x double] containing the converted values.\n"
10358"static __inline __m256d __DEFAULT_FN_ATTRS\n"
10359"_mm256_cvtepi32_pd(__m128i __a)\n"
10360"{\n"
10361" return (__m256d)__builtin_convertvector((__v4si)__a, __v4df);\n"
10362"}\n"
10363"\n"
10364"/// Converts a vector of [8 x i32] into a vector of [8 x float].\n"
10365"///\n"
10366"/// \\headerfile <x86intrin.h>\n"
10367"///\n"
10368"/// This intrinsic corresponds to the <c> VCVTDQ2PS </c> instruction.\n"
10369"///\n"
10370"/// \\param __a\n"
10371"/// A 256-bit integer vector.\n"
10372"/// \\returns A 256-bit vector of [8 x float] containing the converted values.\n"
10373"static __inline __m256 __DEFAULT_FN_ATTRS\n"
10374"_mm256_cvtepi32_ps(__m256i __a)\n"
10375"{\n"
10376" return (__m256)__builtin_convertvector((__v8si)__a, __v8sf);\n"
10377"}\n"
10378"\n"
10379"/// Converts a 256-bit vector of [4 x double] into a 128-bit vector of\n"
10380"/// [4 x float].\n"
10381"///\n"
10382"/// \\headerfile <x86intrin.h>\n"
10383"///\n"
10384"/// This intrinsic corresponds to the <c> VCVTPD2PS </c> instruction.\n"
10385"///\n"
10386"/// \\param __a\n"
10387"/// A 256-bit vector of [4 x double].\n"
10388"/// \\returns A 128-bit vector of [4 x float] containing the converted values.\n"
10389"static __inline __m128 __DEFAULT_FN_ATTRS\n"
10390"_mm256_cvtpd_ps(__m256d __a)\n"
10391"{\n"
10392" return (__m128)__builtin_ia32_cvtpd2ps256((__v4df) __a);\n"
10393"}\n"
10394"\n"
10395"/// Converts a vector of [8 x float] into a vector of [8 x i32].\n"
10396"///\n"
10397"/// \\headerfile <x86intrin.h>\n"
10398"///\n"
10399"/// This intrinsic corresponds to the <c> VCVTPS2DQ </c> instruction.\n"
10400"///\n"
10401"/// \\param __a\n"
10402"/// A 256-bit vector of [8 x float].\n"
10403"/// \\returns A 256-bit integer vector containing the converted values.\n"
10404"static __inline __m256i __DEFAULT_FN_ATTRS\n"
10405"_mm256_cvtps_epi32(__m256 __a)\n"
10406"{\n"
10407" return (__m256i)__builtin_ia32_cvtps2dq256((__v8sf) __a);\n"
10408"}\n"
10409"\n"
10410"/// Converts a 128-bit vector of [4 x float] into a 256-bit vector of [4\n"
10411"/// x double].\n"
10412"///\n"
10413"/// \\headerfile <x86intrin.h>\n"
10414"///\n"
10415"/// This intrinsic corresponds to the <c> VCVTPS2PD </c> instruction.\n"
10416"///\n"
10417"/// \\param __a\n"
10418"/// A 128-bit vector of [4 x float].\n"
10419"/// \\returns A 256-bit vector of [4 x double] containing the converted values.\n"
10420"static __inline __m256d __DEFAULT_FN_ATTRS\n"
10421"_mm256_cvtps_pd(__m128 __a)\n"
10422"{\n"
10423" return (__m256d)__builtin_convertvector((__v4sf)__a, __v4df);\n"
10424"}\n"
10425"\n"
10426"/// Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4\n"
10427"/// x i32], truncating the result by rounding towards zero when it is\n"
10428"/// inexact.\n"
10429"///\n"
10430"/// \\headerfile <x86intrin.h>\n"
10431"///\n"
10432"/// This intrinsic corresponds to the <c> VCVTTPD2DQ </c> instruction.\n"
10433"///\n"
10434"/// \\param __a\n"
10435"/// A 256-bit vector of [4 x double].\n"
10436"/// \\returns A 128-bit integer vector containing the converted values.\n"
10437"static __inline __m128i __DEFAULT_FN_ATTRS\n"
10438"_mm256_cvttpd_epi32(__m256d __a)\n"
10439"{\n"
10440" return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a);\n"
10441"}\n"
10442"\n"
10443"/// Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4\n"
10444"/// x i32]. When a conversion is inexact, the value returned is rounded\n"
10445"/// according to the rounding control bits in the MXCSR register.\n"
10446"///\n"
10447"/// \\headerfile <x86intrin.h>\n"
10448"///\n"
10449"/// This intrinsic corresponds to the <c> VCVTPD2DQ </c> instruction.\n"
10450"///\n"
10451"/// \\param __a\n"
10452"/// A 256-bit vector of [4 x double].\n"
10453"/// \\returns A 128-bit integer vector containing the converted values.\n"
10454"static __inline __m128i __DEFAULT_FN_ATTRS\n"
10455"_mm256_cvtpd_epi32(__m256d __a)\n"
10456"{\n"
10457" return (__m128i)__builtin_ia32_cvtpd2dq256((__v4df) __a);\n"
10458"}\n"
10459"\n"
10460"/// Converts a vector of [8 x float] into a vector of [8 x i32],\n"
10461"/// truncating the result by rounding towards zero when it is inexact.\n"
10462"///\n"
10463"/// \\headerfile <x86intrin.h>\n"
10464"///\n"
10465"/// This intrinsic corresponds to the <c> VCVTTPS2DQ </c> instruction.\n"
10466"///\n"
10467"/// \\param __a\n"
10468"/// A 256-bit vector of [8 x float].\n"
10469"/// \\returns A 256-bit integer vector containing the converted values.\n"
10470"static __inline __m256i __DEFAULT_FN_ATTRS\n"
10471"_mm256_cvttps_epi32(__m256 __a)\n"
10472"{\n"
10473" return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a);\n"
10474"}\n"
10475"\n"
10476"/// Returns the first element of the input vector of [4 x double].\n"
10477"///\n"
10478"/// \\headerfile <avxintrin.h>\n"
10479"///\n"
10480"/// This intrinsic is a utility function and does not correspond to a specific\n"
10481"/// instruction.\n"
10482"///\n"
10483"/// \\param __a\n"
10484"/// A 256-bit vector of [4 x double].\n"
10485"/// \\returns A 64 bit double containing the first element of the input vector.\n"
10486"static __inline double __DEFAULT_FN_ATTRS\n"
10487"_mm256_cvtsd_f64(__m256d __a)\n"
10488"{\n"
10489" return __a[0];\n"
10490"}\n"
10491"\n"
10492"/// Returns the first element of the input vector of [8 x i32].\n"
10493"///\n"
10494"/// \\headerfile <avxintrin.h>\n"
10495"///\n"
10496"/// This intrinsic is a utility function and does not correspond to a specific\n"
10497"/// instruction.\n"
10498"///\n"
10499"/// \\param __a\n"
10500"/// A 256-bit vector of [8 x i32].\n"
10501"/// \\returns A 32 bit integer containing the first element of the input vector.\n"
10502"static __inline int __DEFAULT_FN_ATTRS\n"
10503"_mm256_cvtsi256_si32(__m256i __a)\n"
10504"{\n"
10505" __v8si __b = (__v8si)__a;\n"
10506" return __b[0];\n"
10507"}\n"
10508"\n"
10509"/// Returns the first element of the input vector of [8 x float].\n"
10510"///\n"
10511"/// \\headerfile <avxintrin.h>\n"
10512"///\n"
10513"/// This intrinsic is a utility function and does not correspond to a specific\n"
10514"/// instruction.\n"
10515"///\n"
10516"/// \\param __a\n"
10517"/// A 256-bit vector of [8 x float].\n"
10518"/// \\returns A 32 bit float containing the first element of the input vector.\n"
10519"static __inline float __DEFAULT_FN_ATTRS\n"
10520"_mm256_cvtss_f32(__m256 __a)\n"
10521"{\n"
10522" return __a[0];\n"
10523"}\n"
10524"\n"
10525"/* Vector replicate */\n"
10526"/// Moves and duplicates odd-indexed values from a 256-bit vector of\n"
10527"/// [8 x float] to float values in a 256-bit vector of [8 x float].\n"
10528"///\n"
10529"/// \\headerfile <x86intrin.h>\n"
10530"///\n"
10531"/// This intrinsic corresponds to the <c> VMOVSHDUP </c> instruction.\n"
10532"///\n"
10533"/// \\param __a\n"
10534"/// A 256-bit vector of [8 x float]. \\n\n"
10535"/// Bits [255:224] of \\a __a are written to bits [255:224] and [223:192] of\n"
10536"/// the return value. \\n\n"
10537"/// Bits [191:160] of \\a __a are written to bits [191:160] and [159:128] of\n"
10538"/// the return value. \\n\n"
10539"/// Bits [127:96] of \\a __a are written to bits [127:96] and [95:64] of the\n"
10540"/// return value. \\n\n"
10541"/// Bits [63:32] of \\a __a are written to bits [63:32] and [31:0] of the\n"
10542"/// return value.\n"
10543"/// \\returns A 256-bit vector of [8 x float] containing the moved and duplicated\n"
10544"/// values.\n"
10545"static __inline __m256 __DEFAULT_FN_ATTRS\n"
10546"_mm256_movehdup_ps(__m256 __a)\n"
10547"{\n"
10548" return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 1, 1, 3, 3, 5, 5, 7, 7);\n"
10549"}\n"
10550"\n"
10551"/// Moves and duplicates even-indexed values from a 256-bit vector of\n"
10552"/// [8 x float] to float values in a 256-bit vector of [8 x float].\n"
10553"///\n"
10554"/// \\headerfile <x86intrin.h>\n"
10555"///\n"
10556"/// This intrinsic corresponds to the <c> VMOVSLDUP </c> instruction.\n"
10557"///\n"
10558"/// \\param __a\n"
10559"/// A 256-bit vector of [8 x float]. \\n\n"
10560"/// Bits [223:192] of \\a __a are written to bits [255:224] and [223:192] of\n"
10561"/// the return value. \\n\n"
10562"/// Bits [159:128] of \\a __a are written to bits [191:160] and [159:128] of\n"
10563"/// the return value. \\n\n"
10564"/// Bits [95:64] of \\a __a are written to bits [127:96] and [95:64] of the\n"
10565"/// return value. \\n\n"
10566"/// Bits [31:0] of \\a __a are written to bits [63:32] and [31:0] of the\n"
10567"/// return value.\n"
10568"/// \\returns A 256-bit vector of [8 x float] containing the moved and duplicated\n"
10569"/// values.\n"
10570"static __inline __m256 __DEFAULT_FN_ATTRS\n"
10571"_mm256_moveldup_ps(__m256 __a)\n"
10572"{\n"
10573" return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 0, 2, 2, 4, 4, 6, 6);\n"
10574"}\n"
10575"\n"
10576"/// Moves and duplicates double-precision floating point values from a\n"
10577"/// 256-bit vector of [4 x double] to double-precision values in a 256-bit\n"
10578"/// vector of [4 x double].\n"
10579"///\n"
10580"/// \\headerfile <x86intrin.h>\n"
10581"///\n"
10582"/// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction.\n"
10583"///\n"
10584"/// \\param __a\n"
10585"/// A 256-bit vector of [4 x double]. \\n\n"
10586"/// Bits [63:0] of \\a __a are written to bits [127:64] and [63:0] of the\n"
10587"/// return value. \\n\n"
10588"/// Bits [191:128] of \\a __a are written to bits [255:192] and [191:128] of\n"
10589"/// the return value.\n"
10590"/// \\returns A 256-bit vector of [4 x double] containing the moved and\n"
10591"/// duplicated values.\n"
10592"static __inline __m256d __DEFAULT_FN_ATTRS\n"
10593"_mm256_movedup_pd(__m256d __a)\n"
10594"{\n"
10595" return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 0, 2, 2);\n"
10596"}\n"
10597"\n"
10598"/* Unpack and Interleave */\n"
10599"/// Unpacks the odd-indexed vector elements from two 256-bit vectors of\n"
10600"/// [4 x double] and interleaves them into a 256-bit vector of [4 x double].\n"
10601"///\n"
10602"/// \\headerfile <x86intrin.h>\n"
10603"///\n"
10604"/// This intrinsic corresponds to the <c> VUNPCKHPD </c> instruction.\n"
10605"///\n"
10606"/// \\param __a\n"
10607"/// A 256-bit floating-point vector of [4 x double]. \\n\n"
10608"/// Bits [127:64] are written to bits [63:0] of the return value. \\n\n"
10609"/// Bits [255:192] are written to bits [191:128] of the return value. \\n\n"
10610"/// \\param __b\n"
10611"/// A 256-bit floating-point vector of [4 x double]. \\n\n"
10612"/// Bits [127:64] are written to bits [127:64] of the return value. \\n\n"
10613"/// Bits [255:192] are written to bits [255:192] of the return value. \\n\n"
10614"/// \\returns A 256-bit vector of [4 x double] containing the interleaved values.\n"
10615"static __inline __m256d __DEFAULT_FN_ATTRS\n"
10616"_mm256_unpackhi_pd(__m256d __a, __m256d __b)\n"
10617"{\n"
10618" return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 1, 5, 1+2, 5+2);\n"
10619"}\n"
10620"\n"
10621"/// Unpacks the even-indexed vector elements from two 256-bit vectors of\n"
10622"/// [4 x double] and interleaves them into a 256-bit vector of [4 x double].\n"
10623"///\n"
10624"/// \\headerfile <x86intrin.h>\n"
10625"///\n"
10626"/// This intrinsic corresponds to the <c> VUNPCKLPD </c> instruction.\n"
10627"///\n"
10628"/// \\param __a\n"
10629"/// A 256-bit floating-point vector of [4 x double]. \\n\n"
10630"/// Bits [63:0] are written to bits [63:0] of the return value. \\n\n"
10631"/// Bits [191:128] are written to bits [191:128] of the return value.\n"
10632"/// \\param __b\n"
10633"/// A 256-bit floating-point vector of [4 x double]. \\n\n"
10634"/// Bits [63:0] are written to bits [127:64] of the return value. \\n\n"
10635"/// Bits [191:128] are written to bits [255:192] of the return value. \\n\n"
10636"/// \\returns A 256-bit vector of [4 x double] containing the interleaved values.\n"
10637"static __inline __m256d __DEFAULT_FN_ATTRS\n"
10638"_mm256_unpacklo_pd(__m256d __a, __m256d __b)\n"
10639"{\n"
10640" return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 0, 4, 0+2, 4+2);\n"
10641"}\n"
10642"\n"
10643"/// Unpacks the 32-bit vector elements 2, 3, 6 and 7 from each of the\n"
10644"/// two 256-bit vectors of [8 x float] and interleaves them into a 256-bit\n"
10645"/// vector of [8 x float].\n"
10646"///\n"
10647"/// \\headerfile <x86intrin.h>\n"
10648"///\n"
10649"/// This intrinsic corresponds to the <c> VUNPCKHPS </c> instruction.\n"
10650"///\n"
10651"/// \\param __a\n"
10652"/// A 256-bit vector of [8 x float]. \\n\n"
10653"/// Bits [95:64] are written to bits [31:0] of the return value. \\n\n"
10654"/// Bits [127:96] are written to bits [95:64] of the return value. \\n\n"
10655"/// Bits [223:192] are written to bits [159:128] of the return value. \\n\n"
10656"/// Bits [255:224] are written to bits [223:192] of the return value.\n"
10657"/// \\param __b\n"
10658"/// A 256-bit vector of [8 x float]. \\n\n"
10659"/// Bits [95:64] are written to bits [63:32] of the return value. \\n\n"
10660"/// Bits [127:96] are written to bits [127:96] of the return value. \\n\n"
10661"/// Bits [223:192] are written to bits [191:160] of the return value. \\n\n"
10662"/// Bits [255:224] are written to bits [255:224] of the return value.\n"
10663"/// \\returns A 256-bit vector of [8 x float] containing the interleaved values.\n"
10664"static __inline __m256 __DEFAULT_FN_ATTRS\n"
10665"_mm256_unpackhi_ps(__m256 __a, __m256 __b)\n"
10666"{\n"
10667" return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 2, 10, 2+1, 10+1, 6, 14, 6+1, 14+1);\n"
10668"}\n"
10669"\n"
10670"/// Unpacks the 32-bit vector elements 0, 1, 4 and 5 from each of the\n"
10671"/// two 256-bit vectors of [8 x float] and interleaves them into a 256-bit\n"
10672"/// vector of [8 x float].\n"
10673"///\n"
10674"/// \\headerfile <x86intrin.h>\n"
10675"///\n"
10676"/// This intrinsic corresponds to the <c> VUNPCKLPS </c> instruction.\n"
10677"///\n"
10678"/// \\param __a\n"
10679"/// A 256-bit vector of [8 x float]. \\n\n"
10680"/// Bits [31:0] are written to bits [31:0] of the return value. \\n\n"
10681"/// Bits [63:32] are written to bits [95:64] of the return value. \\n\n"
10682"/// Bits [159:128] are written to bits [159:128] of the return value. \\n\n"
10683"/// Bits [191:160] are written to bits [223:192] of the return value.\n"
10684"/// \\param __b\n"
10685"/// A 256-bit vector of [8 x float]. \\n\n"
10686"/// Bits [31:0] are written to bits [63:32] of the return value. \\n\n"
10687"/// Bits [63:32] are written to bits [127:96] of the return value. \\n\n"
10688"/// Bits [159:128] are written to bits [191:160] of the return value. \\n\n"
10689"/// Bits [191:160] are written to bits [255:224] of the return value.\n"
10690"/// \\returns A 256-bit vector of [8 x float] containing the interleaved values.\n"
10691"static __inline __m256 __DEFAULT_FN_ATTRS\n"
10692"_mm256_unpacklo_ps(__m256 __a, __m256 __b)\n"
10693"{\n"
10694" return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 0, 8, 0+1, 8+1, 4, 12, 4+1, 12+1);\n"
10695"}\n"
10696"\n"
10697"/* Bit Test */\n"
10698"/// Given two 128-bit floating-point vectors of [2 x double], perform an\n"
10699"/// element-by-element comparison of the double-precision element in the\n"
10700"/// first source vector and the corresponding element in the second source\n"
10701"/// vector.\n"
10702"///\n"
10703"/// The EFLAGS register is updated as follows: \\n\n"
10704"/// If there is at least one pair of double-precision elements where the\n"
10705"/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n"
10706"/// ZF flag is set to 1. \\n\n"
10707"/// If there is at least one pair of double-precision elements where the\n"
10708"/// sign-bit of the first element is 0 and the sign-bit of the second element\n"
10709"/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n"
10710"/// This intrinsic returns the value of the ZF flag.\n"
10711"///\n"
10712"/// \\headerfile <x86intrin.h>\n"
10713"///\n"
10714"/// This intrinsic corresponds to the <c> VTESTPD </c> instruction.\n"
10715"///\n"
10716"/// \\param __a\n"
10717"/// A 128-bit vector of [2 x double].\n"
10718"/// \\param __b\n"
10719"/// A 128-bit vector of [2 x double].\n"
10720"/// \\returns the ZF flag in the EFLAGS register.\n"
10721"static __inline int __DEFAULT_FN_ATTRS128\n"
10722"_mm_testz_pd(__m128d __a, __m128d __b)\n"
10723"{\n"
10724" return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b);\n"
10725"}\n"
10726"\n"
10727"/// Given two 128-bit floating-point vectors of [2 x double], perform an\n"
10728"/// element-by-element comparison of the double-precision element in the\n"
10729"/// first source vector and the corresponding element in the second source\n"
10730"/// vector.\n"
10731"///\n"
10732"/// The EFLAGS register is updated as follows: \\n\n"
10733"/// If there is at least one pair of double-precision elements where the\n"
10734"/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n"
10735"/// ZF flag is set to 1. \\n\n"
10736"/// If there is at least one pair of double-precision elements where the\n"
10737"/// sign-bit of the first element is 0 and the sign-bit of the second element\n"
10738"/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n"
10739"/// This intrinsic returns the value of the CF flag.\n"
10740"///\n"
10741"/// \\headerfile <x86intrin.h>\n"
10742"///\n"
10743"/// This intrinsic corresponds to the <c> VTESTPD </c> instruction.\n"
10744"///\n"
10745"/// \\param __a\n"
10746"/// A 128-bit vector of [2 x double].\n"
10747"/// \\param __b\n"
10748"/// A 128-bit vector of [2 x double].\n"
10749"/// \\returns the CF flag in the EFLAGS register.\n"
10750"static __inline int __DEFAULT_FN_ATTRS128\n"
10751"_mm_testc_pd(__m128d __a, __m128d __b)\n"
10752"{\n"
10753" return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b);\n"
10754"}\n"
10755"\n"
10756"/// Given two 128-bit floating-point vectors of [2 x double], perform an\n"
10757"/// element-by-element comparison of the double-precision element in the\n"
10758"/// first source vector and the corresponding element in the second source\n"
10759"/// vector.\n"
10760"///\n"
10761"/// The EFLAGS register is updated as follows: \\n\n"
10762"/// If there is at least one pair of double-precision elements where the\n"
10763"/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n"
10764"/// ZF flag is set to 1. \\n\n"
10765"/// If there is at least one pair of double-precision elements where the\n"
10766"/// sign-bit of the first element is 0 and the sign-bit of the second element\n"
10767"/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n"
10768"/// This intrinsic returns 1 if both the ZF and CF flags are set to 0,\n"
10769"/// otherwise it returns 0.\n"
10770"///\n"
10771"/// \\headerfile <x86intrin.h>\n"
10772"///\n"
10773"/// This intrinsic corresponds to the <c> VTESTPD </c> instruction.\n"
10774"///\n"
10775"/// \\param __a\n"
10776"/// A 128-bit vector of [2 x double].\n"
10777"/// \\param __b\n"
10778"/// A 128-bit vector of [2 x double].\n"
10779"/// \\returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.\n"
10780"static __inline int __DEFAULT_FN_ATTRS128\n"
10781"_mm_testnzc_pd(__m128d __a, __m128d __b)\n"
10782"{\n"
10783" return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b);\n"
10784"}\n"
10785"\n"
10786"/// Given two 128-bit floating-point vectors of [4 x float], perform an\n"
10787"/// element-by-element comparison of the single-precision element in the\n"
10788"/// first source vector and the corresponding element in the second source\n"
10789"/// vector.\n"
10790"///\n"
10791"/// The EFLAGS register is updated as follows: \\n\n"
10792"/// If there is at least one pair of single-precision elements where the\n"
10793"/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n"
10794"/// ZF flag is set to 1. \\n\n"
10795"/// If there is at least one pair of single-precision elements where the\n"
10796"/// sign-bit of the first element is 0 and the sign-bit of the second element\n"
10797"/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n"
10798"/// This intrinsic returns the value of the ZF flag.\n"
10799"///\n"
10800"/// \\headerfile <x86intrin.h>\n"
10801"///\n"
10802"/// This intrinsic corresponds to the <c> VTESTPS </c> instruction.\n"
10803"///\n"
10804"/// \\param __a\n"
10805"/// A 128-bit vector of [4 x float].\n"
10806"/// \\param __b\n"
10807"/// A 128-bit vector of [4 x float].\n"
10808"/// \\returns the ZF flag.\n"
10809"static __inline int __DEFAULT_FN_ATTRS128\n"
10810"_mm_testz_ps(__m128 __a, __m128 __b)\n"
10811"{\n"
10812" return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b);\n"
10813"}\n"
10814"\n"
10815"/// Given two 128-bit floating-point vectors of [4 x float], perform an\n"
10816"/// element-by-element comparison of the single-precision element in the\n"
10817"/// first source vector and the corresponding element in the second source\n"
10818"/// vector.\n"
10819"///\n"
10820"/// The EFLAGS register is updated as follows: \\n\n"
10821"/// If there is at least one pair of single-precision elements where the\n"
10822"/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n"
10823"/// ZF flag is set to 1. \\n\n"
10824"/// If there is at least one pair of single-precision elements where the\n"
10825"/// sign-bit of the first element is 0 and the sign-bit of the second element\n"
10826"/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n"
10827"/// This intrinsic returns the value of the CF flag.\n"
10828"///\n"
10829"/// \\headerfile <x86intrin.h>\n"
10830"///\n"
10831"/// This intrinsic corresponds to the <c> VTESTPS </c> instruction.\n"
10832"///\n"
10833"/// \\param __a\n"
10834"/// A 128-bit vector of [4 x float].\n"
10835"/// \\param __b\n"
10836"/// A 128-bit vector of [4 x float].\n"
10837"/// \\returns the CF flag.\n"
10838"static __inline int __DEFAULT_FN_ATTRS128\n"
10839"_mm_testc_ps(__m128 __a, __m128 __b)\n"
10840"{\n"
10841" return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b);\n"
10842"}\n"
10843"\n"
10844"/// Given two 128-bit floating-point vectors of [4 x float], perform an\n"
10845"/// element-by-element comparison of the single-precision element in the\n"
10846"/// first source vector and the corresponding element in the second source\n"
10847"/// vector.\n"
10848"///\n"
10849"/// The EFLAGS register is updated as follows: \\n\n"
10850"/// If there is at least one pair of single-precision elements where the\n"
10851"/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n"
10852"/// ZF flag is set to 1. \\n\n"
10853"/// If there is at least one pair of single-precision elements where the\n"
10854"/// sign-bit of the first element is 0 and the sign-bit of the second element\n"
10855"/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n"
10856"/// This intrinsic returns 1 if both the ZF and CF flags are set to 0,\n"
10857"/// otherwise it returns 0.\n"
10858"///\n"
10859"/// \\headerfile <x86intrin.h>\n"
10860"///\n"
10861"/// This intrinsic corresponds to the <c> VTESTPS </c> instruction.\n"
10862"///\n"
10863"/// \\param __a\n"
10864"/// A 128-bit vector of [4 x float].\n"
10865"/// \\param __b\n"
10866"/// A 128-bit vector of [4 x float].\n"
10867"/// \\returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.\n"
10868"static __inline int __DEFAULT_FN_ATTRS128\n"
10869"_mm_testnzc_ps(__m128 __a, __m128 __b)\n"
10870"{\n"
10871" return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b);\n"
10872"}\n"
10873"\n"
10874"/// Given two 256-bit floating-point vectors of [4 x double], perform an\n"
10875"/// element-by-element comparison of the double-precision elements in the\n"
10876"/// first source vector and the corresponding elements in the second source\n"
10877"/// vector.\n"
10878"///\n"
10879"/// The EFLAGS register is updated as follows: \\n\n"
10880"/// If there is at least one pair of double-precision elements where the\n"
10881"/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n"
10882"/// ZF flag is set to 1. \\n\n"
10883"/// If there is at least one pair of double-precision elements where the\n"
10884"/// sign-bit of the first element is 0 and the sign-bit of the second element\n"
10885"/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n"
10886"/// This intrinsic returns the value of the ZF flag.\n"
10887"///\n"
10888"/// \\headerfile <x86intrin.h>\n"
10889"///\n"
10890"/// This intrinsic corresponds to the <c> VTESTPD </c> instruction.\n"
10891"///\n"
10892"/// \\param __a\n"
10893"/// A 256-bit vector of [4 x double].\n"
10894"/// \\param __b\n"
10895"/// A 256-bit vector of [4 x double].\n"
10896"/// \\returns the ZF flag.\n"
10897"static __inline int __DEFAULT_FN_ATTRS\n"
10898"_mm256_testz_pd(__m256d __a, __m256d __b)\n"
10899"{\n"
10900" return __builtin_ia32_vtestzpd256((__v4df)__a, (__v4df)__b);\n"
10901"}\n"
10902"\n"
10903"/// Given two 256-bit floating-point vectors of [4 x double], perform an\n"
10904"/// element-by-element comparison of the double-precision elements in the\n"
10905"/// first source vector and the corresponding elements in the second source\n"
10906"/// vector.\n"
10907"///\n"
10908"/// The EFLAGS register is updated as follows: \\n\n"
10909"/// If there is at least one pair of double-precision elements where the\n"
10910"/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n"
10911"/// ZF flag is set to 1. \\n\n"
10912"/// If there is at least one pair of double-precision elements where the\n"
10913"/// sign-bit of the first element is 0 and the sign-bit of the second element\n"
10914"/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n"
10915"/// This intrinsic returns the value of the CF flag.\n"
10916"///\n"
10917"/// \\headerfile <x86intrin.h>\n"
10918"///\n"
10919"/// This intrinsic corresponds to the <c> VTESTPD </c> instruction.\n"
10920"///\n"
10921"/// \\param __a\n"
10922"/// A 256-bit vector of [4 x double].\n"
10923"/// \\param __b\n"
10924"/// A 256-bit vector of [4 x double].\n"
10925"/// \\returns the CF flag.\n"
10926"static __inline int __DEFAULT_FN_ATTRS\n"
10927"_mm256_testc_pd(__m256d __a, __m256d __b)\n"
10928"{\n"
10929" return __builtin_ia32_vtestcpd256((__v4df)__a, (__v4df)__b);\n"
10930"}\n"
10931"\n"
10932"/// Given two 256-bit floating-point vectors of [4 x double], perform an\n"
10933"/// element-by-element comparison of the double-precision elements in the\n"
10934"/// first source vector and the corresponding elements in the second source\n"
10935"/// vector.\n"
10936"///\n"
10937"/// The EFLAGS register is updated as follows: \\n\n"
10938"/// If there is at least one pair of double-precision elements where the\n"
10939"/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n"
10940"/// ZF flag is set to 1. \\n\n"
10941"/// If there is at least one pair of double-precision elements where the\n"
10942"/// sign-bit of the first element is 0 and the sign-bit of the second element\n"
10943"/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n"
10944"/// This intrinsic returns 1 if both the ZF and CF flags are set to 0,\n"
10945"/// otherwise it returns 0.\n"
10946"///\n"
10947"/// \\headerfile <x86intrin.h>\n"
10948"///\n"
10949"/// This intrinsic corresponds to the <c> VTESTPD </c> instruction.\n"
10950"///\n"
10951"/// \\param __a\n"
10952"/// A 256-bit vector of [4 x double].\n"
10953"/// \\param __b\n"
10954"/// A 256-bit vector of [4 x double].\n"
10955"/// \\returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.\n"
10956"static __inline int __DEFAULT_FN_ATTRS\n"
10957"_mm256_testnzc_pd(__m256d __a, __m256d __b)\n"
10958"{\n"
10959" return __builtin_ia32_vtestnzcpd256((__v4df)__a, (__v4df)__b);\n"
10960"}\n"
10961"\n"
10962"/// Given two 256-bit floating-point vectors of [8 x float], perform an\n"
10963"/// element-by-element comparison of the single-precision element in the\n"
10964"/// first source vector and the corresponding element in the second source\n"
10965"/// vector.\n"
10966"///\n"
10967"/// The EFLAGS register is updated as follows: \\n\n"
10968"/// If there is at least one pair of single-precision elements where the\n"
10969"/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n"
10970"/// ZF flag is set to 1. \\n\n"
10971"/// If there is at least one pair of single-precision elements where the\n"
10972"/// sign-bit of the first element is 0 and the sign-bit of the second element\n"
10973"/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n"
10974"/// This intrinsic returns the value of the ZF flag.\n"
10975"///\n"
10976"/// \\headerfile <x86intrin.h>\n"
10977"///\n"
10978"/// This intrinsic corresponds to the <c> VTESTPS </c> instruction.\n"
10979"///\n"
10980"/// \\param __a\n"
10981"/// A 256-bit vector of [8 x float].\n"
10982"/// \\param __b\n"
10983"/// A 256-bit vector of [8 x float].\n"
10984"/// \\returns the ZF flag.\n"
10985"static __inline int __DEFAULT_FN_ATTRS\n"
10986"_mm256_testz_ps(__m256 __a, __m256 __b)\n"
10987"{\n"
10988" return __builtin_ia32_vtestzps256((__v8sf)__a, (__v8sf)__b);\n"
10989"}\n"
10990"\n"
10991"/// Given two 256-bit floating-point vectors of [8 x float], perform an\n"
10992"/// element-by-element comparison of the single-precision element in the\n"
10993"/// first source vector and the corresponding element in the second source\n"
10994"/// vector.\n"
10995"///\n"
10996"/// The EFLAGS register is updated as follows: \\n\n"
10997"/// If there is at least one pair of single-precision elements where the\n"
10998"/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n"
10999"/// ZF flag is set to 1. \\n\n"
11000"/// If there is at least one pair of single-precision elements where the\n"
11001"/// sign-bit of the first element is 0 and the sign-bit of the second element\n"
11002"/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n"
11003"/// This intrinsic returns the value of the CF flag.\n"
11004"///\n"
11005"/// \\headerfile <x86intrin.h>\n"
11006"///\n"
11007"/// This intrinsic corresponds to the <c> VTESTPS </c> instruction.\n"
11008"///\n"
11009"/// \\param __a\n"
11010"/// A 256-bit vector of [8 x float].\n"
11011"/// \\param __b\n"
11012"/// A 256-bit vector of [8 x float].\n"
11013"/// \\returns the CF flag.\n"
11014"static __inline int __DEFAULT_FN_ATTRS\n"
11015"_mm256_testc_ps(__m256 __a, __m256 __b)\n"
11016"{\n"
11017" return __builtin_ia32_vtestcps256((__v8sf)__a, (__v8sf)__b);\n"
11018"}\n"
11019"\n"
11020"/// Given two 256-bit floating-point vectors of [8 x float], perform an\n"
11021"/// element-by-element comparison of the single-precision elements in the\n"
11022"/// first source vector and the corresponding elements in the second source\n"
11023"/// vector.\n"
11024"///\n"
11025"/// The EFLAGS register is updated as follows: \\n\n"
11026"/// If there is at least one pair of single-precision elements where the\n"
11027"/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n"
11028"/// ZF flag is set to 1. \\n\n"
11029"/// If there is at least one pair of single-precision elements where the\n"
11030"/// sign-bit of the first element is 0 and the sign-bit of the second element\n"
11031"/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n"
11032"/// This intrinsic returns 1 if both the ZF and CF flags are set to 0,\n"
11033"/// otherwise it returns 0.\n"
11034"///\n"
11035"/// \\headerfile <x86intrin.h>\n"
11036"///\n"
11037"/// This intrinsic corresponds to the <c> VTESTPS </c> instruction.\n"
11038"///\n"
11039"/// \\param __a\n"
11040"/// A 256-bit vector of [8 x float].\n"
11041"/// \\param __b\n"
11042"/// A 256-bit vector of [8 x float].\n"
11043"/// \\returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.\n"
11044"static __inline int __DEFAULT_FN_ATTRS\n"
11045"_mm256_testnzc_ps(__m256 __a, __m256 __b)\n"
11046"{\n"
11047" return __builtin_ia32_vtestnzcps256((__v8sf)__a, (__v8sf)__b);\n"
11048"}\n"
11049"\n"
11050"/// Given two 256-bit integer vectors, perform a bit-by-bit comparison\n"
11051"/// of the two source vectors.\n"
11052"///\n"
11053"/// The EFLAGS register is updated as follows: \\n\n"
11054"/// If there is at least one pair of bits where both bits are 1, the ZF flag\n"
11055"/// is set to 0. Otherwise the ZF flag is set to 1. \\n\n"
11056"/// If there is at least one pair of bits where the bit from the first source\n"
11057"/// vector is 0 and the bit from the second source vector is 1, the CF flag\n"
11058"/// is set to 0. Otherwise the CF flag is set to 1. \\n\n"
11059"/// This intrinsic returns the value of the ZF flag.\n"
11060"///\n"
11061"/// \\headerfile <x86intrin.h>\n"
11062"///\n"
11063"/// This intrinsic corresponds to the <c> VPTEST </c> instruction.\n"
11064"///\n"
11065"/// \\param __a\n"
11066"/// A 256-bit integer vector.\n"
11067"/// \\param __b\n"
11068"/// A 256-bit integer vector.\n"
11069"/// \\returns the ZF flag.\n"
11070"static __inline int __DEFAULT_FN_ATTRS\n"
11071"_mm256_testz_si256(__m256i __a, __m256i __b)\n"
11072"{\n"
11073" return __builtin_ia32_ptestz256((__v4di)__a, (__v4di)__b);\n"
11074"}\n"
11075"\n"
11076"/// Given two 256-bit integer vectors, perform a bit-by-bit comparison\n"
11077"/// of the two source vectors.\n"
11078"///\n"
11079"/// The EFLAGS register is updated as follows: \\n\n"
11080"/// If there is at least one pair of bits where both bits are 1, the ZF flag\n"
11081"/// is set to 0. Otherwise the ZF flag is set to 1. \\n\n"
11082"/// If there is at least one pair of bits where the bit from the first source\n"
11083"/// vector is 0 and the bit from the second source vector is 1, the CF flag\n"
11084"/// is set to 0. Otherwise the CF flag is set to 1. \\n\n"
11085"/// This intrinsic returns the value of the CF flag.\n"
11086"///\n"
11087"/// \\headerfile <x86intrin.h>\n"
11088"///\n"
11089"/// This intrinsic corresponds to the <c> VPTEST </c> instruction.\n"
11090"///\n"
11091"/// \\param __a\n"
11092"/// A 256-bit integer vector.\n"
11093"/// \\param __b\n"
11094"/// A 256-bit integer vector.\n"
11095"/// \\returns the CF flag.\n"
11096"static __inline int __DEFAULT_FN_ATTRS\n"
11097"_mm256_testc_si256(__m256i __a, __m256i __b)\n"
11098"{\n"
11099" return __builtin_ia32_ptestc256((__v4di)__a, (__v4di)__b);\n"
11100"}\n"
11101"\n"
11102"/// Given two 256-bit integer vectors, perform a bit-by-bit comparison\n"
11103"/// of the two source vectors.\n"
11104"///\n"
11105"/// The EFLAGS register is updated as follows: \\n\n"
11106"/// If there is at least one pair of bits where both bits are 1, the ZF flag\n"
11107"/// is set to 0. Otherwise the ZF flag is set to 1. \\n\n"
11108"/// If there is at least one pair of bits where the bit from the first source\n"
11109"/// vector is 0 and the bit from the second source vector is 1, the CF flag\n"
11110"/// is set to 0. Otherwise the CF flag is set to 1. \\n\n"
11111"/// This intrinsic returns 1 if both the ZF and CF flags are set to 0,\n"
11112"/// otherwise it returns 0.\n"
11113"///\n"
11114"/// \\headerfile <x86intrin.h>\n"
11115"///\n"
11116"/// This intrinsic corresponds to the <c> VPTEST </c> instruction.\n"
11117"///\n"
11118"/// \\param __a\n"
11119"/// A 256-bit integer vector.\n"
11120"/// \\param __b\n"
11121"/// A 256-bit integer vector.\n"
11122"/// \\returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.\n"
11123"static __inline int __DEFAULT_FN_ATTRS\n"
11124"_mm256_testnzc_si256(__m256i __a, __m256i __b)\n"
11125"{\n"
11126" return __builtin_ia32_ptestnzc256((__v4di)__a, (__v4di)__b);\n"
11127"}\n"
11128"\n"
11129"/* Vector extract sign mask */\n"
11130"/// Extracts the sign bits of double-precision floating point elements\n"
11131"/// in a 256-bit vector of [4 x double] and writes them to the lower order\n"
11132"/// bits of the return value.\n"
11133"///\n"
11134"/// \\headerfile <x86intrin.h>\n"
11135"///\n"
11136"/// This intrinsic corresponds to the <c> VMOVMSKPD </c> instruction.\n"
11137"///\n"
11138"/// \\param __a\n"
11139"/// A 256-bit vector of [4 x double] containing the double-precision\n"
11140"/// floating point values with sign bits to be extracted.\n"
11141"/// \\returns The sign bits from the operand, written to bits [3:0].\n"
11142"static __inline int __DEFAULT_FN_ATTRS\n"
11143"_mm256_movemask_pd(__m256d __a)\n"
11144"{\n"
11145" return __builtin_ia32_movmskpd256((__v4df)__a);\n"
11146"}\n"
11147"\n"
11148"/// Extracts the sign bits of single-precision floating point elements\n"
11149"/// in a 256-bit vector of [8 x float] and writes them to the lower order\n"
11150"/// bits of the return value.\n"
11151"///\n"
11152"/// \\headerfile <x86intrin.h>\n"
11153"///\n"
11154"/// This intrinsic corresponds to the <c> VMOVMSKPS </c> instruction.\n"
11155"///\n"
11156"/// \\param __a\n"
11157"/// A 256-bit vector of [8 x float] containing the single-precision floating\n"
11158"/// point values with sign bits to be extracted.\n"
11159"/// \\returns The sign bits from the operand, written to bits [7:0].\n"
11160"static __inline int __DEFAULT_FN_ATTRS\n"
11161"_mm256_movemask_ps(__m256 __a)\n"
11162"{\n"
11163" return __builtin_ia32_movmskps256((__v8sf)__a);\n"
11164"}\n"
11165"\n"
11166"/* Vector __zero */\n"
11167"/// Zeroes the contents of all XMM or YMM registers.\n"
11168"///\n"
11169"/// \\headerfile <x86intrin.h>\n"
11170"///\n"
11171"/// This intrinsic corresponds to the <c> VZEROALL </c> instruction.\n"
11172"static __inline void __attribute__((__always_inline__, __nodebug__, __target__(\"avx\")))\n"
11173"_mm256_zeroall(void)\n"
11174"{\n"
11175" __builtin_ia32_vzeroall();\n"
11176"}\n"
11177"\n"
11178"/// Zeroes the upper 128 bits (bits 255:128) of all YMM registers.\n"
11179"///\n"
11180"/// \\headerfile <x86intrin.h>\n"
11181"///\n"
11182"/// This intrinsic corresponds to the <c> VZEROUPPER </c> instruction.\n"
11183"static __inline void __attribute__((__always_inline__, __nodebug__, __target__(\"avx\")))\n"
11184"_mm256_zeroupper(void)\n"
11185"{\n"
11186" __builtin_ia32_vzeroupper();\n"
11187"}\n"
11188"\n"
11189"/* Vector load with broadcast */\n"
11190"/// Loads a scalar single-precision floating point value from the\n"
11191"/// specified address pointed to by \\a __a and broadcasts it to the elements\n"
11192"/// of a [4 x float] vector.\n"
11193"///\n"
11194"/// \\headerfile <x86intrin.h>\n"
11195"///\n"
11196"/// This intrinsic corresponds to the <c> VBROADCASTSS </c> instruction.\n"
11197"///\n"
11198"/// \\param __a\n"
11199"/// The single-precision floating point value to be broadcast.\n"
11200"/// \\returns A 128-bit vector of [4 x float] whose 32-bit elements are set\n"
11201"/// equal to the broadcast value.\n"
11202"static __inline __m128 __DEFAULT_FN_ATTRS128\n"
11203"_mm_broadcast_ss(float const *__a)\n"
11204"{\n"
11205" float __f = *__a;\n"
11206" return __extension__ (__m128)(__v4sf){ __f, __f, __f, __f };\n"
11207"}\n"
11208"\n"
11209"/// Loads a scalar double-precision floating point value from the\n"
11210"/// specified address pointed to by \\a __a and broadcasts it to the elements\n"
11211"/// of a [4 x double] vector.\n"
11212"///\n"
11213"/// \\headerfile <x86intrin.h>\n"
11214"///\n"
11215"/// This intrinsic corresponds to the <c> VBROADCASTSD </c> instruction.\n"
11216"///\n"
11217"/// \\param __a\n"
11218"/// The double-precision floating point value to be broadcast.\n"
11219"/// \\returns A 256-bit vector of [4 x double] whose 64-bit elements are set\n"
11220"/// equal to the broadcast value.\n"
11221"static __inline __m256d __DEFAULT_FN_ATTRS\n"
11222"_mm256_broadcast_sd(double const *__a)\n"
11223"{\n"
11224" double __d = *__a;\n"
11225" return __extension__ (__m256d)(__v4df){ __d, __d, __d, __d };\n"
11226"}\n"
11227"\n"
11228"/// Loads a scalar single-precision floating point value from the\n"
11229"/// specified address pointed to by \\a __a and broadcasts it to the elements\n"
11230"/// of a [8 x float] vector.\n"
11231"///\n"
11232"/// \\headerfile <x86intrin.h>\n"
11233"///\n"
11234"/// This intrinsic corresponds to the <c> VBROADCASTSS </c> instruction.\n"
11235"///\n"
11236"/// \\param __a\n"
11237"/// The single-precision floating point value to be broadcast.\n"
11238"/// \\returns A 256-bit vector of [8 x float] whose 32-bit elements are set\n"
11239"/// equal to the broadcast value.\n"
11240"static __inline __m256 __DEFAULT_FN_ATTRS\n"
11241"_mm256_broadcast_ss(float const *__a)\n"
11242"{\n"
11243" float __f = *__a;\n"
11244" return __extension__ (__m256)(__v8sf){ __f, __f, __f, __f, __f, __f, __f, __f };\n"
11245"}\n"
11246"\n"
11247"/// Loads the data from a 128-bit vector of [2 x double] from the\n"
11248"/// specified address pointed to by \\a __a and broadcasts it to 128-bit\n"
11249"/// elements in a 256-bit vector of [4 x double].\n"
11250"///\n"
11251"/// \\headerfile <x86intrin.h>\n"
11252"///\n"
11253"/// This intrinsic corresponds to the <c> VBROADCASTF128 </c> instruction.\n"
11254"///\n"
11255"/// \\param __a\n"
11256"/// The 128-bit vector of [2 x double] to be broadcast.\n"
11257"/// \\returns A 256-bit vector of [4 x double] whose 128-bit elements are set\n"
11258"/// equal to the broadcast value.\n"
11259"static __inline __m256d __DEFAULT_FN_ATTRS\n"
11260"_mm256_broadcast_pd(__m128d const *__a)\n"
11261"{\n"
11262" __m128d __b = _mm_loadu_pd((const double *)__a);\n"
11263" return (__m256d)__builtin_shufflevector((__v2df)__b, (__v2df)__b,\n"
11264" 0, 1, 0, 1);\n"
11265"}\n"
11266"\n"
11267"/// Loads the data from a 128-bit vector of [4 x float] from the\n"
11268"/// specified address pointed to by \\a __a and broadcasts it to 128-bit\n"
11269"/// elements in a 256-bit vector of [8 x float].\n"
11270"///\n"
11271"/// \\headerfile <x86intrin.h>\n"
11272"///\n"
11273"/// This intrinsic corresponds to the <c> VBROADCASTF128 </c> instruction.\n"
11274"///\n"
11275"/// \\param __a\n"
11276"/// The 128-bit vector of [4 x float] to be broadcast.\n"
11277"/// \\returns A 256-bit vector of [8 x float] whose 128-bit elements are set\n"
11278"/// equal to the broadcast value.\n"
11279"static __inline __m256 __DEFAULT_FN_ATTRS\n"
11280"_mm256_broadcast_ps(__m128 const *__a)\n"
11281"{\n"
11282" __m128 __b = _mm_loadu_ps((const float *)__a);\n"
11283" return (__m256)__builtin_shufflevector((__v4sf)__b, (__v4sf)__b,\n"
11284" 0, 1, 2, 3, 0, 1, 2, 3);\n"
11285"}\n"
11286"\n"
11287"/* SIMD load ops */\n"
11288"/// Loads 4 double-precision floating point values from a 32-byte aligned\n"
11289"/// memory location pointed to by \\a __p into a vector of [4 x double].\n"
11290"///\n"
11291"/// \\headerfile <x86intrin.h>\n"
11292"///\n"
11293"/// This intrinsic corresponds to the <c> VMOVAPD </c> instruction.\n"
11294"///\n"
11295"/// \\param __p\n"
11296"/// A 32-byte aligned pointer to a memory location containing\n"
11297"/// double-precision floating point values.\n"
11298"/// \\returns A 256-bit vector of [4 x double] containing the moved values.\n"
11299"static __inline __m256d __DEFAULT_FN_ATTRS\n"
11300"_mm256_load_pd(double const *__p)\n"
11301"{\n"
11302" return *(__m256d *)__p;\n"
11303"}\n"
11304"\n"
11305"/// Loads 8 single-precision floating point values from a 32-byte aligned\n"
11306"/// memory location pointed to by \\a __p into a vector of [8 x float].\n"
11307"///\n"
11308"/// \\headerfile <x86intrin.h>\n"
11309"///\n"
11310"/// This intrinsic corresponds to the <c> VMOVAPS </c> instruction.\n"
11311"///\n"
11312"/// \\param __p\n"
11313"/// A 32-byte aligned pointer to a memory location containing float values.\n"
11314"/// \\returns A 256-bit vector of [8 x float] containing the moved values.\n"
11315"static __inline __m256 __DEFAULT_FN_ATTRS\n"
11316"_mm256_load_ps(float const *__p)\n"
11317"{\n"
11318" return *(__m256 *)__p;\n"
11319"}\n"
11320"\n"
11321"/// Loads 4 double-precision floating point values from an unaligned\n"
11322"/// memory location pointed to by \\a __p into a vector of [4 x double].\n"
11323"///\n"
11324"/// \\headerfile <x86intrin.h>\n"
11325"///\n"
11326"/// This intrinsic corresponds to the <c> VMOVUPD </c> instruction.\n"
11327"///\n"
11328"/// \\param __p\n"
11329"/// A pointer to a memory location containing double-precision floating\n"
11330"/// point values.\n"
11331"/// \\returns A 256-bit vector of [4 x double] containing the moved values.\n"
11332"static __inline __m256d __DEFAULT_FN_ATTRS\n"
11333"_mm256_loadu_pd(double const *__p)\n"
11334"{\n"
11335" struct __loadu_pd {\n"
11336" __m256d __v;\n"
11337" } __attribute__((__packed__, __may_alias__));\n"
11338" return ((struct __loadu_pd*)__p)->__v;\n"
11339"}\n"
11340"\n"
11341"/// Loads 8 single-precision floating point values from an unaligned\n"
11342"/// memory location pointed to by \\a __p into a vector of [8 x float].\n"
11343"///\n"
11344"/// \\headerfile <x86intrin.h>\n"
11345"///\n"
11346"/// This intrinsic corresponds to the <c> VMOVUPS </c> instruction.\n"
11347"///\n"
11348"/// \\param __p\n"
11349"/// A pointer to a memory location containing single-precision floating\n"
11350"/// point values.\n"
11351"/// \\returns A 256-bit vector of [8 x float] containing the moved values.\n"
11352"static __inline __m256 __DEFAULT_FN_ATTRS\n"
11353"_mm256_loadu_ps(float const *__p)\n"
11354"{\n"
11355" struct __loadu_ps {\n"
11356" __m256 __v;\n"
11357" } __attribute__((__packed__, __may_alias__));\n"
11358" return ((struct __loadu_ps*)__p)->__v;\n"
11359"}\n"
11360"\n"
11361"/// Loads 256 bits of integer data from a 32-byte aligned memory\n"
11362"/// location pointed to by \\a __p into elements of a 256-bit integer vector.\n"
11363"///\n"
11364"/// \\headerfile <x86intrin.h>\n"
11365"///\n"
11366"/// This intrinsic corresponds to the <c> VMOVDQA </c> instruction.\n"
11367"///\n"
11368"/// \\param __p\n"
11369"/// A 32-byte aligned pointer to a 256-bit integer vector containing integer\n"
11370"/// values.\n"
11371"/// \\returns A 256-bit integer vector containing the moved values.\n"
11372"static __inline __m256i __DEFAULT_FN_ATTRS\n"
11373"_mm256_load_si256(__m256i const *__p)\n"
11374"{\n"
11375" return *__p;\n"
11376"}\n"
11377"\n"
11378"/// Loads 256 bits of integer data from an unaligned memory location\n"
11379"/// pointed to by \\a __p into a 256-bit integer vector.\n"
11380"///\n"
11381"/// \\headerfile <x86intrin.h>\n"
11382"///\n"
11383"/// This intrinsic corresponds to the <c> VMOVDQU </c> instruction.\n"
11384"///\n"
11385"/// \\param __p\n"
11386"/// A pointer to a 256-bit integer vector containing integer values.\n"
11387"/// \\returns A 256-bit integer vector containing the moved values.\n"
11388"static __inline __m256i __DEFAULT_FN_ATTRS\n"
11389"_mm256_loadu_si256(__m256i const *__p)\n"
11390"{\n"
11391" struct __loadu_si256 {\n"
11392" __m256i __v;\n"
11393" } __attribute__((__packed__, __may_alias__));\n"
11394" return ((struct __loadu_si256*)__p)->__v;\n"
11395"}\n"
11396"\n"
11397"/// Loads 256 bits of integer data from an unaligned memory location\n"
11398"/// pointed to by \\a __p into a 256-bit integer vector. This intrinsic may\n"
11399"/// perform better than \\c _mm256_loadu_si256 when the data crosses a cache\n"
11400"/// line boundary.\n"
11401"///\n"
11402"/// \\headerfile <x86intrin.h>\n"
11403"///\n"
11404"/// This intrinsic corresponds to the <c> VLDDQU </c> instruction.\n"
11405"///\n"
11406"/// \\param __p\n"
11407"/// A pointer to a 256-bit integer vector containing integer values.\n"
11408"/// \\returns A 256-bit integer vector containing the moved values.\n"
11409"static __inline __m256i __DEFAULT_FN_ATTRS\n"
11410"_mm256_lddqu_si256(__m256i const *__p)\n"
11411"{\n"
11412" return (__m256i)__builtin_ia32_lddqu256((char const *)__p);\n"
11413"}\n"
11414"\n"
11415"/* SIMD store ops */\n"
11416"/// Stores double-precision floating point values from a 256-bit vector\n"
11417"/// of [4 x double] to a 32-byte aligned memory location pointed to by\n"
11418"/// \\a __p.\n"
11419"///\n"
11420"/// \\headerfile <x86intrin.h>\n"
11421"///\n"
11422"/// This intrinsic corresponds to the <c> VMOVAPD </c> instruction.\n"
11423"///\n"
11424"/// \\param __p\n"
11425"/// A 32-byte aligned pointer to a memory location that will receive the\n"
11426"/// double-precision floaing point values.\n"
11427"/// \\param __a\n"
11428"/// A 256-bit vector of [4 x double] containing the values to be moved.\n"
11429"static __inline void __DEFAULT_FN_ATTRS\n"
11430"_mm256_store_pd(double *__p, __m256d __a)\n"
11431"{\n"
11432" *(__m256d *)__p = __a;\n"
11433"}\n"
11434"\n"
11435"/// Stores single-precision floating point values from a 256-bit vector\n"
11436"/// of [8 x float] to a 32-byte aligned memory location pointed to by \\a __p.\n"
11437"///\n"
11438"/// \\headerfile <x86intrin.h>\n"
11439"///\n"
11440"/// This intrinsic corresponds to the <c> VMOVAPS </c> instruction.\n"
11441"///\n"
11442"/// \\param __p\n"
11443"/// A 32-byte aligned pointer to a memory location that will receive the\n"
11444"/// float values.\n"
11445"/// \\param __a\n"
11446"/// A 256-bit vector of [8 x float] containing the values to be moved.\n"
11447"static __inline void __DEFAULT_FN_ATTRS\n"
11448"_mm256_store_ps(float *__p, __m256 __a)\n"
11449"{\n"
11450" *(__m256 *)__p = __a;\n"
11451"}\n"
11452"\n"
11453"/// Stores double-precision floating point values from a 256-bit vector\n"
11454"/// of [4 x double] to an unaligned memory location pointed to by \\a __p.\n"
11455"///\n"
11456"/// \\headerfile <x86intrin.h>\n"
11457"///\n"
11458"/// This intrinsic corresponds to the <c> VMOVUPD </c> instruction.\n"
11459"///\n"
11460"/// \\param __p\n"
11461"/// A pointer to a memory location that will receive the double-precision\n"
11462"/// floating point values.\n"
11463"/// \\param __a\n"
11464"/// A 256-bit vector of [4 x double] containing the values to be moved.\n"
11465"static __inline void __DEFAULT_FN_ATTRS\n"
11466"_mm256_storeu_pd(double *__p, __m256d __a)\n"
11467"{\n"
11468" struct __storeu_pd {\n"
11469" __m256d __v;\n"
11470" } __attribute__((__packed__, __may_alias__));\n"
11471" ((struct __storeu_pd*)__p)->__v = __a;\n"
11472"}\n"
11473"\n"
11474"/// Stores single-precision floating point values from a 256-bit vector\n"
11475"/// of [8 x float] to an unaligned memory location pointed to by \\a __p.\n"
11476"///\n"
11477"/// \\headerfile <x86intrin.h>\n"
11478"///\n"
11479"/// This intrinsic corresponds to the <c> VMOVUPS </c> instruction.\n"
11480"///\n"
11481"/// \\param __p\n"
11482"/// A pointer to a memory location that will receive the float values.\n"
11483"/// \\param __a\n"
11484"/// A 256-bit vector of [8 x float] containing the values to be moved.\n"
11485"static __inline void __DEFAULT_FN_ATTRS\n"
11486"_mm256_storeu_ps(float *__p, __m256 __a)\n"
11487"{\n"
11488" struct __storeu_ps {\n"
11489" __m256 __v;\n"
11490" } __attribute__((__packed__, __may_alias__));\n"
11491" ((struct __storeu_ps*)__p)->__v = __a;\n"
11492"}\n"
11493"\n"
11494"/// Stores integer values from a 256-bit integer vector to a 32-byte\n"
11495"/// aligned memory location pointed to by \\a __p.\n"
11496"///\n"
11497"/// \\headerfile <x86intrin.h>\n"
11498"///\n"
11499"/// This intrinsic corresponds to the <c> VMOVDQA </c> instruction.\n"
11500"///\n"
11501"/// \\param __p\n"
11502"/// A 32-byte aligned pointer to a memory location that will receive the\n"
11503"/// integer values.\n"
11504"/// \\param __a\n"
11505"/// A 256-bit integer vector containing the values to be moved.\n"
11506"static __inline void __DEFAULT_FN_ATTRS\n"
11507"_mm256_store_si256(__m256i *__p, __m256i __a)\n"
11508"{\n"
11509" *__p = __a;\n"
11510"}\n"
11511"\n"
11512"/// Stores integer values from a 256-bit integer vector to an unaligned\n"
11513"/// memory location pointed to by \\a __p.\n"
11514"///\n"
11515"/// \\headerfile <x86intrin.h>\n"
11516"///\n"
11517"/// This intrinsic corresponds to the <c> VMOVDQU </c> instruction.\n"
11518"///\n"
11519"/// \\param __p\n"
11520"/// A pointer to a memory location that will receive the integer values.\n"
11521"/// \\param __a\n"
11522"/// A 256-bit integer vector containing the values to be moved.\n"
11523"static __inline void __DEFAULT_FN_ATTRS\n"
11524"_mm256_storeu_si256(__m256i *__p, __m256i __a)\n"
11525"{\n"
11526" struct __storeu_si256 {\n"
11527" __m256i __v;\n"
11528" } __attribute__((__packed__, __may_alias__));\n"
11529" ((struct __storeu_si256*)__p)->__v = __a;\n"
11530"}\n"
11531"\n"
11532"/* Conditional load ops */\n"
11533"/// Conditionally loads double-precision floating point elements from a\n"
11534"/// memory location pointed to by \\a __p into a 128-bit vector of\n"
11535"/// [2 x double], depending on the mask bits associated with each data\n"
11536"/// element.\n"
11537"///\n"
11538"/// \\headerfile <x86intrin.h>\n"
11539"///\n"
11540"/// This intrinsic corresponds to the <c> VMASKMOVPD </c> instruction.\n"
11541"///\n"
11542"/// \\param __p\n"
11543"/// A pointer to a memory location that contains the double-precision\n"
11544"/// floating point values.\n"
11545"/// \\param __m\n"
11546"/// A 128-bit integer vector containing the mask. The most significant bit of\n"
11547"/// each data element represents the mask bits. If a mask bit is zero, the\n"
11548"/// corresponding value in the memory location is not loaded and the\n"
11549"/// corresponding field in the return value is set to zero.\n"
11550"/// \\returns A 128-bit vector of [2 x double] containing the loaded values.\n"
11551"static __inline __m128d __DEFAULT_FN_ATTRS128\n"
11552"_mm_maskload_pd(double const *__p, __m128i __m)\n"
11553"{\n"
11554" return (__m128d)__builtin_ia32_maskloadpd((const __v2df *)__p, (__v2di)__m);\n"
11555"}\n"
11556"\n"
11557"/// Conditionally loads double-precision floating point elements from a\n"
11558"/// memory location pointed to by \\a __p into a 256-bit vector of\n"
11559"/// [4 x double], depending on the mask bits associated with each data\n"
11560"/// element.\n"
11561"///\n"
11562"/// \\headerfile <x86intrin.h>\n"
11563"///\n"
11564"/// This intrinsic corresponds to the <c> VMASKMOVPD </c> instruction.\n"
11565"///\n"
11566"/// \\param __p\n"
11567"/// A pointer to a memory location that contains the double-precision\n"
11568"/// floating point values.\n"
11569"/// \\param __m\n"
11570"/// A 256-bit integer vector of [4 x quadword] containing the mask. The most\n"
11571"/// significant bit of each quadword element represents the mask bits. If a\n"
11572"/// mask bit is zero, the corresponding value in the memory location is not\n"
11573"/// loaded and the corresponding field in the return value is set to zero.\n"
11574"/// \\returns A 256-bit vector of [4 x double] containing the loaded values.\n"
11575"static __inline __m256d __DEFAULT_FN_ATTRS\n"
11576"_mm256_maskload_pd(double const *__p, __m256i __m)\n"
11577"{\n"
11578" return (__m256d)__builtin_ia32_maskloadpd256((const __v4df *)__p,\n"
11579" (__v4di)__m);\n"
11580"}\n"
11581"\n"
11582"/// Conditionally loads single-precision floating point elements from a\n"
11583"/// memory location pointed to by \\a __p into a 128-bit vector of\n"
11584"/// [4 x float], depending on the mask bits associated with each data\n"
11585"/// element.\n"
11586"///\n"
11587"/// \\headerfile <x86intrin.h>\n"
11588"///\n"
11589"/// This intrinsic corresponds to the <c> VMASKMOVPS </c> instruction.\n"
11590"///\n"
11591"/// \\param __p\n"
11592"/// A pointer to a memory location that contains the single-precision\n"
11593"/// floating point values.\n"
11594"/// \\param __m\n"
11595"/// A 128-bit integer vector containing the mask. The most significant bit of\n"
11596"/// each data element represents the mask bits. If a mask bit is zero, the\n"
11597"/// corresponding value in the memory location is not loaded and the\n"
11598"/// corresponding field in the return value is set to zero.\n"
11599"/// \\returns A 128-bit vector of [4 x float] containing the loaded values.\n"
11600"static __inline __m128 __DEFAULT_FN_ATTRS128\n"
11601"_mm_maskload_ps(float const *__p, __m128i __m)\n"
11602"{\n"
11603" return (__m128)__builtin_ia32_maskloadps((const __v4sf *)__p, (__v4si)__m);\n"
11604"}\n"
11605"\n"
11606"/// Conditionally loads single-precision floating point elements from a\n"
11607"/// memory location pointed to by \\a __p into a 256-bit vector of\n"
11608"/// [8 x float], depending on the mask bits associated with each data\n"
11609"/// element.\n"
11610"///\n"
11611"/// \\headerfile <x86intrin.h>\n"
11612"///\n"
11613"/// This intrinsic corresponds to the <c> VMASKMOVPS </c> instruction.\n"
11614"///\n"
11615"/// \\param __p\n"
11616"/// A pointer to a memory location that contains the single-precision\n"
11617"/// floating point values.\n"
11618"/// \\param __m\n"
11619"/// A 256-bit integer vector of [8 x dword] containing the mask. The most\n"
11620"/// significant bit of each dword element represents the mask bits. If a mask\n"
11621"/// bit is zero, the corresponding value in the memory location is not loaded\n"
11622"/// and the corresponding field in the return value is set to zero.\n"
11623"/// \\returns A 256-bit vector of [8 x float] containing the loaded values.\n"
11624"static __inline __m256 __DEFAULT_FN_ATTRS\n"
11625"_mm256_maskload_ps(float const *__p, __m256i __m)\n"
11626"{\n"
11627" return (__m256)__builtin_ia32_maskloadps256((const __v8sf *)__p, (__v8si)__m);\n"
11628"}\n"
11629"\n"
11630"/* Conditional store ops */\n"
11631"/// Moves single-precision floating point values from a 256-bit vector\n"
11632"/// of [8 x float] to a memory location pointed to by \\a __p, according to\n"
11633"/// the specified mask.\n"
11634"///\n"
11635"/// \\headerfile <x86intrin.h>\n"
11636"///\n"
11637"/// This intrinsic corresponds to the <c> VMASKMOVPS </c> instruction.\n"
11638"///\n"
11639"/// \\param __p\n"
11640"/// A pointer to a memory location that will receive the float values.\n"
11641"/// \\param __m\n"
11642"/// A 256-bit integer vector of [8 x dword] containing the mask. The most\n"
11643"/// significant bit of each dword element in the mask vector represents the\n"
11644"/// mask bits. If a mask bit is zero, the corresponding value from vector\n"
11645"/// \\a __a is not stored and the corresponding field in the memory location\n"
11646"/// pointed to by \\a __p is not changed.\n"
11647"/// \\param __a\n"
11648"/// A 256-bit vector of [8 x float] containing the values to be stored.\n"
11649"static __inline void __DEFAULT_FN_ATTRS\n"
11650"_mm256_maskstore_ps(float *__p, __m256i __m, __m256 __a)\n"
11651"{\n"
11652" __builtin_ia32_maskstoreps256((__v8sf *)__p, (__v8si)__m, (__v8sf)__a);\n"
11653"}\n"
11654"\n"
11655"/// Moves double-precision values from a 128-bit vector of [2 x double]\n"
11656"/// to a memory location pointed to by \\a __p, according to the specified\n"
11657"/// mask.\n"
11658"///\n"
11659"/// \\headerfile <x86intrin.h>\n"
11660"///\n"
11661"/// This intrinsic corresponds to the <c> VMASKMOVPD </c> instruction.\n"
11662"///\n"
11663"/// \\param __p\n"
11664"/// A pointer to a memory location that will receive the float values.\n"
11665"/// \\param __m\n"
11666"/// A 128-bit integer vector containing the mask. The most significant bit of\n"
11667"/// each field in the mask vector represents the mask bits. If a mask bit is\n"
11668"/// zero, the corresponding value from vector \\a __a is not stored and the\n"
11669"/// corresponding field in the memory location pointed to by \\a __p is not\n"
11670"/// changed.\n"
11671"/// \\param __a\n"
11672"/// A 128-bit vector of [2 x double] containing the values to be stored.\n"
11673"static __inline void __DEFAULT_FN_ATTRS128\n"
11674"_mm_maskstore_pd(double *__p, __m128i __m, __m128d __a)\n"
11675"{\n"
11676" __builtin_ia32_maskstorepd((__v2df *)__p, (__v2di)__m, (__v2df)__a);\n"
11677"}\n"
11678"\n"
11679"/// Moves double-precision values from a 256-bit vector of [4 x double]\n"
11680"/// to a memory location pointed to by \\a __p, according to the specified\n"
11681"/// mask.\n"
11682"///\n"
11683"/// \\headerfile <x86intrin.h>\n"
11684"///\n"
11685"/// This intrinsic corresponds to the <c> VMASKMOVPD </c> instruction.\n"
11686"///\n"
11687"/// \\param __p\n"
11688"/// A pointer to a memory location that will receive the float values.\n"
11689"/// \\param __m\n"
11690"/// A 256-bit integer vector of [4 x quadword] containing the mask. The most\n"
11691"/// significant bit of each quadword element in the mask vector represents\n"
11692"/// the mask bits. If a mask bit is zero, the corresponding value from vector\n"
11693"/// __a is not stored and the corresponding field in the memory location\n"
11694"/// pointed to by \\a __p is not changed.\n"
11695"/// \\param __a\n"
11696"/// A 256-bit vector of [4 x double] containing the values to be stored.\n"
11697"static __inline void __DEFAULT_FN_ATTRS\n"
11698"_mm256_maskstore_pd(double *__p, __m256i __m, __m256d __a)\n"
11699"{\n"
11700" __builtin_ia32_maskstorepd256((__v4df *)__p, (__v4di)__m, (__v4df)__a);\n"
11701"}\n"
11702"\n"
11703"/// Moves single-precision floating point values from a 128-bit vector\n"
11704"/// of [4 x float] to a memory location pointed to by \\a __p, according to\n"
11705"/// the specified mask.\n"
11706"///\n"
11707"/// \\headerfile <x86intrin.h>\n"
11708"///\n"
11709"/// This intrinsic corresponds to the <c> VMASKMOVPS </c> instruction.\n"
11710"///\n"
11711"/// \\param __p\n"
11712"/// A pointer to a memory location that will receive the float values.\n"
11713"/// \\param __m\n"
11714"/// A 128-bit integer vector containing the mask. The most significant bit of\n"
11715"/// each field in the mask vector represents the mask bits. If a mask bit is\n"
11716"/// zero, the corresponding value from vector __a is not stored and the\n"
11717"/// corresponding field in the memory location pointed to by \\a __p is not\n"
11718"/// changed.\n"
11719"/// \\param __a\n"
11720"/// A 128-bit vector of [4 x float] containing the values to be stored.\n"
11721"static __inline void __DEFAULT_FN_ATTRS128\n"
11722"_mm_maskstore_ps(float *__p, __m128i __m, __m128 __a)\n"
11723"{\n"
11724" __builtin_ia32_maskstoreps((__v4sf *)__p, (__v4si)__m, (__v4sf)__a);\n"
11725"}\n"
11726"\n"
11727"/* Cacheability support ops */\n"
11728"/// Moves integer data from a 256-bit integer vector to a 32-byte\n"
11729"/// aligned memory location. To minimize caching, the data is flagged as\n"
11730"/// non-temporal (unlikely to be used again soon).\n"
11731"///\n"
11732"/// \\headerfile <x86intrin.h>\n"
11733"///\n"
11734"/// This intrinsic corresponds to the <c> VMOVNTDQ </c> instruction.\n"
11735"///\n"
11736"/// \\param __a\n"
11737"/// A pointer to a 32-byte aligned memory location that will receive the\n"
11738"/// integer values.\n"
11739"/// \\param __b\n"
11740"/// A 256-bit integer vector containing the values to be moved.\n"
11741"static __inline void __DEFAULT_FN_ATTRS\n"
11742"_mm256_stream_si256(__m256i *__a, __m256i __b)\n"
11743"{\n"
11744" typedef __v4di __v4di_aligned __attribute__((aligned(32)));\n"
11745" __builtin_nontemporal_store((__v4di_aligned)__b, (__v4di_aligned*)__a);\n"
11746"}\n"
11747"\n"
11748"/// Moves double-precision values from a 256-bit vector of [4 x double]\n"
11749"/// to a 32-byte aligned memory location. To minimize caching, the data is\n"
11750"/// flagged as non-temporal (unlikely to be used again soon).\n"
11751"///\n"
11752"/// \\headerfile <x86intrin.h>\n"
11753"///\n"
11754"/// This intrinsic corresponds to the <c> VMOVNTPD </c> instruction.\n"
11755"///\n"
11756"/// \\param __a\n"
11757"/// A pointer to a 32-byte aligned memory location that will receive the\n"
11758"/// double-precision floating-point values.\n"
11759"/// \\param __b\n"
11760"/// A 256-bit vector of [4 x double] containing the values to be moved.\n"
11761"static __inline void __DEFAULT_FN_ATTRS\n"
11762"_mm256_stream_pd(double *__a, __m256d __b)\n"
11763"{\n"
11764" typedef __v4df __v4df_aligned __attribute__((aligned(32)));\n"
11765" __builtin_nontemporal_store((__v4df_aligned)__b, (__v4df_aligned*)__a);\n"
11766"}\n"
11767"\n"
11768"/// Moves single-precision floating point values from a 256-bit vector\n"
11769"/// of [8 x float] to a 32-byte aligned memory location. To minimize\n"
11770"/// caching, the data is flagged as non-temporal (unlikely to be used again\n"
11771"/// soon).\n"
11772"///\n"
11773"/// \\headerfile <x86intrin.h>\n"
11774"///\n"
11775"/// This intrinsic corresponds to the <c> VMOVNTPS </c> instruction.\n"
11776"///\n"
11777"/// \\param __p\n"
11778"/// A pointer to a 32-byte aligned memory location that will receive the\n"
11779"/// single-precision floating point values.\n"
11780"/// \\param __a\n"
11781"/// A 256-bit vector of [8 x float] containing the values to be moved.\n"
11782"static __inline void __DEFAULT_FN_ATTRS\n"
11783"_mm256_stream_ps(float *__p, __m256 __a)\n"
11784"{\n"
11785" typedef __v8sf __v8sf_aligned __attribute__((aligned(32)));\n"
11786" __builtin_nontemporal_store((__v8sf_aligned)__a, (__v8sf_aligned*)__p);\n"
11787"}\n"
11788"\n"
11789"/* Create vectors */\n"
11790"/// Create a 256-bit vector of [4 x double] with undefined values.\n"
11791"///\n"
11792"/// \\headerfile <x86intrin.h>\n"
11793"///\n"
11794"/// This intrinsic has no corresponding instruction.\n"
11795"///\n"
11796"/// \\returns A 256-bit vector of [4 x double] containing undefined values.\n"
11797"static __inline__ __m256d __DEFAULT_FN_ATTRS\n"
11798"_mm256_undefined_pd(void)\n"
11799"{\n"
11800" return (__m256d)__builtin_ia32_undef256();\n"
11801"}\n"
11802"\n"
11803"/// Create a 256-bit vector of [8 x float] with undefined values.\n"
11804"///\n"
11805"/// \\headerfile <x86intrin.h>\n"
11806"///\n"
11807"/// This intrinsic has no corresponding instruction.\n"
11808"///\n"
11809"/// \\returns A 256-bit vector of [8 x float] containing undefined values.\n"
11810"static __inline__ __m256 __DEFAULT_FN_ATTRS\n"
11811"_mm256_undefined_ps(void)\n"
11812"{\n"
11813" return (__m256)__builtin_ia32_undef256();\n"
11814"}\n"
11815"\n"
11816"/// Create a 256-bit integer vector with undefined values.\n"
11817"///\n"
11818"/// \\headerfile <x86intrin.h>\n"
11819"///\n"
11820"/// This intrinsic has no corresponding instruction.\n"
11821"///\n"
11822"/// \\returns A 256-bit integer vector containing undefined values.\n"
11823"static __inline__ __m256i __DEFAULT_FN_ATTRS\n"
11824"_mm256_undefined_si256(void)\n"
11825"{\n"
11826" return (__m256i)__builtin_ia32_undef256();\n"
11827"}\n"
11828"\n"
11829"/// Constructs a 256-bit floating-point vector of [4 x double]\n"
11830"/// initialized with the specified double-precision floating-point values.\n"
11831"///\n"
11832"/// \\headerfile <x86intrin.h>\n"
11833"///\n"
11834"/// This intrinsic corresponds to the <c> VUNPCKLPD+VINSERTF128 </c>\n"
11835"/// instruction.\n"
11836"///\n"
11837"/// \\param __a\n"
11838"/// A double-precision floating-point value used to initialize bits [255:192]\n"
11839"/// of the result.\n"
11840"/// \\param __b\n"
11841"/// A double-precision floating-point value used to initialize bits [191:128]\n"
11842"/// of the result.\n"
11843"/// \\param __c\n"
11844"/// A double-precision floating-point value used to initialize bits [127:64]\n"
11845"/// of the result.\n"
11846"/// \\param __d\n"
11847"/// A double-precision floating-point value used to initialize bits [63:0]\n"
11848"/// of the result.\n"
11849"/// \\returns An initialized 256-bit floating-point vector of [4 x double].\n"
11850"static __inline __m256d __DEFAULT_FN_ATTRS\n"
11851"_mm256_set_pd(double __a, double __b, double __c, double __d)\n"
11852"{\n"
11853" return __extension__ (__m256d){ __d, __c, __b, __a };\n"
11854"}\n"
11855"\n"
11856"/// Constructs a 256-bit floating-point vector of [8 x float] initialized\n"
11857"/// with the specified single-precision floating-point values.\n"
11858"///\n"
11859"/// \\headerfile <x86intrin.h>\n"
11860"///\n"
11861"/// This intrinsic is a utility function and does not correspond to a specific\n"
11862"/// instruction.\n"
11863"///\n"
11864"/// \\param __a\n"
11865"/// A single-precision floating-point value used to initialize bits [255:224]\n"
11866"/// of the result.\n"
11867"/// \\param __b\n"
11868"/// A single-precision floating-point value used to initialize bits [223:192]\n"
11869"/// of the result.\n"
11870"/// \\param __c\n"
11871"/// A single-precision floating-point value used to initialize bits [191:160]\n"
11872"/// of the result.\n"
11873"/// \\param __d\n"
11874"/// A single-precision floating-point value used to initialize bits [159:128]\n"
11875"/// of the result.\n"
11876"/// \\param __e\n"
11877"/// A single-precision floating-point value used to initialize bits [127:96]\n"
11878"/// of the result.\n"
11879"/// \\param __f\n"
11880"/// A single-precision floating-point value used to initialize bits [95:64]\n"
11881"/// of the result.\n"
11882"/// \\param __g\n"
11883"/// A single-precision floating-point value used to initialize bits [63:32]\n"
11884"/// of the result.\n"
11885"/// \\param __h\n"
11886"/// A single-precision floating-point value used to initialize bits [31:0]\n"
11887"/// of the result.\n"
11888"/// \\returns An initialized 256-bit floating-point vector of [8 x float].\n"
11889"static __inline __m256 __DEFAULT_FN_ATTRS\n"
11890"_mm256_set_ps(float __a, float __b, float __c, float __d,\n"
11891" float __e, float __f, float __g, float __h)\n"
11892"{\n"
11893" return __extension__ (__m256){ __h, __g, __f, __e, __d, __c, __b, __a };\n"
11894"}\n"
11895"\n"
11896"/// Constructs a 256-bit integer vector initialized with the specified\n"
11897"/// 32-bit integral values.\n"
11898"///\n"
11899"/// \\headerfile <x86intrin.h>\n"
11900"///\n"
11901"/// This intrinsic is a utility function and does not correspond to a specific\n"
11902"/// instruction.\n"
11903"///\n"
11904"/// \\param __i0\n"
11905"/// A 32-bit integral value used to initialize bits [255:224] of the result.\n"
11906"/// \\param __i1\n"
11907"/// A 32-bit integral value used to initialize bits [223:192] of the result.\n"
11908"/// \\param __i2\n"
11909"/// A 32-bit integral value used to initialize bits [191:160] of the result.\n"
11910"/// \\param __i3\n"
11911"/// A 32-bit integral value used to initialize bits [159:128] of the result.\n"
11912"/// \\param __i4\n"
11913"/// A 32-bit integral value used to initialize bits [127:96] of the result.\n"
11914"/// \\param __i5\n"
11915"/// A 32-bit integral value used to initialize bits [95:64] of the result.\n"
11916"/// \\param __i6\n"
11917"/// A 32-bit integral value used to initialize bits [63:32] of the result.\n"
11918"/// \\param __i7\n"
11919"/// A 32-bit integral value used to initialize bits [31:0] of the result.\n"
11920"/// \\returns An initialized 256-bit integer vector.\n"
11921"static __inline __m256i __DEFAULT_FN_ATTRS\n"
11922"_mm256_set_epi32(int __i0, int __i1, int __i2, int __i3,\n"
11923" int __i4, int __i5, int __i6, int __i7)\n"
11924"{\n"
11925" return __extension__ (__m256i)(__v8si){ __i7, __i6, __i5, __i4, __i3, __i2, __i1, __i0 };\n"
11926"}\n"
11927"\n"
11928"/// Constructs a 256-bit integer vector initialized with the specified\n"
11929"/// 16-bit integral values.\n"
11930"///\n"
11931"/// \\headerfile <x86intrin.h>\n"
11932"///\n"
11933"/// This intrinsic is a utility function and does not correspond to a specific\n"
11934"/// instruction.\n"
11935"///\n"
11936"/// \\param __w15\n"
11937"/// A 16-bit integral value used to initialize bits [255:240] of the result.\n"
11938"/// \\param __w14\n"
11939"/// A 16-bit integral value used to initialize bits [239:224] of the result.\n"
11940"/// \\param __w13\n"
11941"/// A 16-bit integral value used to initialize bits [223:208] of the result.\n"
11942"/// \\param __w12\n"
11943"/// A 16-bit integral value used to initialize bits [207:192] of the result.\n"
11944"/// \\param __w11\n"
11945"/// A 16-bit integral value used to initialize bits [191:176] of the result.\n"
11946"/// \\param __w10\n"
11947"/// A 16-bit integral value used to initialize bits [175:160] of the result.\n"
11948"/// \\param __w09\n"
11949"/// A 16-bit integral value used to initialize bits [159:144] of the result.\n"
11950"/// \\param __w08\n"
11951"/// A 16-bit integral value used to initialize bits [143:128] of the result.\n"
11952"/// \\param __w07\n"
11953"/// A 16-bit integral value used to initialize bits [127:112] of the result.\n"
11954"/// \\param __w06\n"
11955"/// A 16-bit integral value used to initialize bits [111:96] of the result.\n"
11956"/// \\param __w05\n"
11957"/// A 16-bit integral value used to initialize bits [95:80] of the result.\n"
11958"/// \\param __w04\n"
11959"/// A 16-bit integral value used to initialize bits [79:64] of the result.\n"
11960"/// \\param __w03\n"
11961"/// A 16-bit integral value used to initialize bits [63:48] of the result.\n"
11962"/// \\param __w02\n"
11963"/// A 16-bit integral value used to initialize bits [47:32] of the result.\n"
11964"/// \\param __w01\n"
11965"/// A 16-bit integral value used to initialize bits [31:16] of the result.\n"
11966"/// \\param __w00\n"
11967"/// A 16-bit integral value used to initialize bits [15:0] of the result.\n"
11968"/// \\returns An initialized 256-bit integer vector.\n"
11969"static __inline __m256i __DEFAULT_FN_ATTRS\n"
11970"_mm256_set_epi16(short __w15, short __w14, short __w13, short __w12,\n"
11971" short __w11, short __w10, short __w09, short __w08,\n"
11972" short __w07, short __w06, short __w05, short __w04,\n"
11973" short __w03, short __w02, short __w01, short __w00)\n"
11974"{\n"
11975" return __extension__ (__m256i)(__v16hi){ __w00, __w01, __w02, __w03, __w04, __w05, __w06,\n"
11976" __w07, __w08, __w09, __w10, __w11, __w12, __w13, __w14, __w15 };\n"
11977"}\n"
11978"\n"
11979"/// Constructs a 256-bit integer vector initialized with the specified\n"
11980"/// 8-bit integral values.\n"
11981"///\n"
11982"/// \\headerfile <x86intrin.h>\n"
11983"///\n"
11984"/// This intrinsic is a utility function and does not correspond to a specific\n"
11985"/// instruction.\n"
11986"///\n"
11987"/// \\param __b31\n"
11988"/// An 8-bit integral value used to initialize bits [255:248] of the result.\n"
11989"/// \\param __b30\n"
11990"/// An 8-bit integral value used to initialize bits [247:240] of the result.\n"
11991"/// \\param __b29\n"
11992"/// An 8-bit integral value used to initialize bits [239:232] of the result.\n"
11993"/// \\param __b28\n"
11994"/// An 8-bit integral value used to initialize bits [231:224] of the result.\n"
11995"/// \\param __b27\n"
11996"/// An 8-bit integral value used to initialize bits [223:216] of the result.\n"
11997"/// \\param __b26\n"
11998"/// An 8-bit integral value used to initialize bits [215:208] of the result.\n"
11999"/// \\param __b25\n"
12000"/// An 8-bit integral value used to initialize bits [207:200] of the result.\n"
12001"/// \\param __b24\n"
12002"/// An 8-bit integral value used to initialize bits [199:192] of the result.\n"
12003"/// \\param __b23\n"
12004"/// An 8-bit integral value used to initialize bits [191:184] of the result.\n"
12005"/// \\param __b22\n"
12006"/// An 8-bit integral value used to initialize bits [183:176] of the result.\n"
12007"/// \\param __b21\n"
12008"/// An 8-bit integral value used to initialize bits [175:168] of the result.\n"
12009"/// \\param __b20\n"
12010"/// An 8-bit integral value used to initialize bits [167:160] of the result.\n"
12011"/// \\param __b19\n"
12012"/// An 8-bit integral value used to initialize bits [159:152] of the result.\n"
12013"/// \\param __b18\n"
12014"/// An 8-bit integral value used to initialize bits [151:144] of the result.\n"
12015"/// \\param __b17\n"
12016"/// An 8-bit integral value used to initialize bits [143:136] of the result.\n"
12017"/// \\param __b16\n"
12018"/// An 8-bit integral value used to initialize bits [135:128] of the result.\n"
12019"/// \\param __b15\n"
12020"/// An 8-bit integral value used to initialize bits [127:120] of the result.\n"
12021"/// \\param __b14\n"
12022"/// An 8-bit integral value used to initialize bits [119:112] of the result.\n"
12023"/// \\param __b13\n"
12024"/// An 8-bit integral value used to initialize bits [111:104] of the result.\n"
12025"/// \\param __b12\n"
12026"/// An 8-bit integral value used to initialize bits [103:96] of the result.\n"
12027"/// \\param __b11\n"
12028"/// An 8-bit integral value used to initialize bits [95:88] of the result.\n"
12029"/// \\param __b10\n"
12030"/// An 8-bit integral value used to initialize bits [87:80] of the result.\n"
12031"/// \\param __b09\n"
12032"/// An 8-bit integral value used to initialize bits [79:72] of the result.\n"
12033"/// \\param __b08\n"
12034"/// An 8-bit integral value used to initialize bits [71:64] of the result.\n"
12035"/// \\param __b07\n"
12036"/// An 8-bit integral value used to initialize bits [63:56] of the result.\n"
12037"/// \\param __b06\n"
12038"/// An 8-bit integral value used to initialize bits [55:48] of the result.\n"
12039"/// \\param __b05\n"
12040"/// An 8-bit integral value used to initialize bits [47:40] of the result.\n"
12041"/// \\param __b04\n"
12042"/// An 8-bit integral value used to initialize bits [39:32] of the result.\n"
12043"/// \\param __b03\n"
12044"/// An 8-bit integral value used to initialize bits [31:24] of the result.\n"
12045"/// \\param __b02\n"
12046"/// An 8-bit integral value used to initialize bits [23:16] of the result.\n"
12047"/// \\param __b01\n"
12048"/// An 8-bit integral value used to initialize bits [15:8] of the result.\n"
12049"/// \\param __b00\n"
12050"/// An 8-bit integral value used to initialize bits [7:0] of the result.\n"
12051"/// \\returns An initialized 256-bit integer vector.\n"
12052"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12053"_mm256_set_epi8(char __b31, char __b30, char __b29, char __b28,\n"
12054" char __b27, char __b26, char __b25, char __b24,\n"
12055" char __b23, char __b22, char __b21, char __b20,\n"
12056" char __b19, char __b18, char __b17, char __b16,\n"
12057" char __b15, char __b14, char __b13, char __b12,\n"
12058" char __b11, char __b10, char __b09, char __b08,\n"
12059" char __b07, char __b06, char __b05, char __b04,\n"
12060" char __b03, char __b02, char __b01, char __b00)\n"
12061"{\n"
12062" return __extension__ (__m256i)(__v32qi){\n"
12063" __b00, __b01, __b02, __b03, __b04, __b05, __b06, __b07,\n"
12064" __b08, __b09, __b10, __b11, __b12, __b13, __b14, __b15,\n"
12065" __b16, __b17, __b18, __b19, __b20, __b21, __b22, __b23,\n"
12066" __b24, __b25, __b26, __b27, __b28, __b29, __b30, __b31\n"
12067" };\n"
12068"}\n"
12069"\n"
12070"/// Constructs a 256-bit integer vector initialized with the specified\n"
12071"/// 64-bit integral values.\n"
12072"///\n"
12073"/// \\headerfile <x86intrin.h>\n"
12074"///\n"
12075"/// This intrinsic corresponds to the <c> VPUNPCKLQDQ+VINSERTF128 </c>\n"
12076"/// instruction.\n"
12077"///\n"
12078"/// \\param __a\n"
12079"/// A 64-bit integral value used to initialize bits [255:192] of the result.\n"
12080"/// \\param __b\n"
12081"/// A 64-bit integral value used to initialize bits [191:128] of the result.\n"
12082"/// \\param __c\n"
12083"/// A 64-bit integral value used to initialize bits [127:64] of the result.\n"
12084"/// \\param __d\n"
12085"/// A 64-bit integral value used to initialize bits [63:0] of the result.\n"
12086"/// \\returns An initialized 256-bit integer vector.\n"
12087"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12088"_mm256_set_epi64x(long long __a, long long __b, long long __c, long long __d)\n"
12089"{\n"
12090" return __extension__ (__m256i)(__v4di){ __d, __c, __b, __a };\n"
12091"}\n"
12092"\n"
12093"/* Create vectors with elements in reverse order */\n"
12094"/// Constructs a 256-bit floating-point vector of [4 x double],\n"
12095"/// initialized in reverse order with the specified double-precision\n"
12096"/// floating-point values.\n"
12097"///\n"
12098"/// \\headerfile <x86intrin.h>\n"
12099"///\n"
12100"/// This intrinsic corresponds to the <c> VUNPCKLPD+VINSERTF128 </c>\n"
12101"/// instruction.\n"
12102"///\n"
12103"/// \\param __a\n"
12104"/// A double-precision floating-point value used to initialize bits [63:0]\n"
12105"/// of the result.\n"
12106"/// \\param __b\n"
12107"/// A double-precision floating-point value used to initialize bits [127:64]\n"
12108"/// of the result.\n"
12109"/// \\param __c\n"
12110"/// A double-precision floating-point value used to initialize bits [191:128]\n"
12111"/// of the result.\n"
12112"/// \\param __d\n"
12113"/// A double-precision floating-point value used to initialize bits [255:192]\n"
12114"/// of the result.\n"
12115"/// \\returns An initialized 256-bit floating-point vector of [4 x double].\n"
12116"static __inline __m256d __DEFAULT_FN_ATTRS\n"
12117"_mm256_setr_pd(double __a, double __b, double __c, double __d)\n"
12118"{\n"
12119" return _mm256_set_pd(__d, __c, __b, __a);\n"
12120"}\n"
12121"\n"
12122"/// Constructs a 256-bit floating-point vector of [8 x float],\n"
12123"/// initialized in reverse order with the specified single-precision\n"
12124"/// float-point values.\n"
12125"///\n"
12126"/// \\headerfile <x86intrin.h>\n"
12127"///\n"
12128"/// This intrinsic is a utility function and does not correspond to a specific\n"
12129"/// instruction.\n"
12130"///\n"
12131"/// \\param __a\n"
12132"/// A single-precision floating-point value used to initialize bits [31:0]\n"
12133"/// of the result.\n"
12134"/// \\param __b\n"
12135"/// A single-precision floating-point value used to initialize bits [63:32]\n"
12136"/// of the result.\n"
12137"/// \\param __c\n"
12138"/// A single-precision floating-point value used to initialize bits [95:64]\n"
12139"/// of the result.\n"
12140"/// \\param __d\n"
12141"/// A single-precision floating-point value used to initialize bits [127:96]\n"
12142"/// of the result.\n"
12143"/// \\param __e\n"
12144"/// A single-precision floating-point value used to initialize bits [159:128]\n"
12145"/// of the result.\n"
12146"/// \\param __f\n"
12147"/// A single-precision floating-point value used to initialize bits [191:160]\n"
12148"/// of the result.\n"
12149"/// \\param __g\n"
12150"/// A single-precision floating-point value used to initialize bits [223:192]\n"
12151"/// of the result.\n"
12152"/// \\param __h\n"
12153"/// A single-precision floating-point value used to initialize bits [255:224]\n"
12154"/// of the result.\n"
12155"/// \\returns An initialized 256-bit floating-point vector of [8 x float].\n"
12156"static __inline __m256 __DEFAULT_FN_ATTRS\n"
12157"_mm256_setr_ps(float __a, float __b, float __c, float __d,\n"
12158" float __e, float __f, float __g, float __h)\n"
12159"{\n"
12160" return _mm256_set_ps(__h, __g, __f, __e, __d, __c, __b, __a);\n"
12161"}\n"
12162"\n"
12163"/// Constructs a 256-bit integer vector, initialized in reverse order\n"
12164"/// with the specified 32-bit integral values.\n"
12165"///\n"
12166"/// \\headerfile <x86intrin.h>\n"
12167"///\n"
12168"/// This intrinsic is a utility function and does not correspond to a specific\n"
12169"/// instruction.\n"
12170"///\n"
12171"/// \\param __i0\n"
12172"/// A 32-bit integral value used to initialize bits [31:0] of the result.\n"
12173"/// \\param __i1\n"
12174"/// A 32-bit integral value used to initialize bits [63:32] of the result.\n"
12175"/// \\param __i2\n"
12176"/// A 32-bit integral value used to initialize bits [95:64] of the result.\n"
12177"/// \\param __i3\n"
12178"/// A 32-bit integral value used to initialize bits [127:96] of the result.\n"
12179"/// \\param __i4\n"
12180"/// A 32-bit integral value used to initialize bits [159:128] of the result.\n"
12181"/// \\param __i5\n"
12182"/// A 32-bit integral value used to initialize bits [191:160] of the result.\n"
12183"/// \\param __i6\n"
12184"/// A 32-bit integral value used to initialize bits [223:192] of the result.\n"
12185"/// \\param __i7\n"
12186"/// A 32-bit integral value used to initialize bits [255:224] of the result.\n"
12187"/// \\returns An initialized 256-bit integer vector.\n"
12188"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12189"_mm256_setr_epi32(int __i0, int __i1, int __i2, int __i3,\n"
12190" int __i4, int __i5, int __i6, int __i7)\n"
12191"{\n"
12192" return _mm256_set_epi32(__i7, __i6, __i5, __i4, __i3, __i2, __i1, __i0);\n"
12193"}\n"
12194"\n"
12195"/// Constructs a 256-bit integer vector, initialized in reverse order\n"
12196"/// with the specified 16-bit integral values.\n"
12197"///\n"
12198"/// \\headerfile <x86intrin.h>\n"
12199"///\n"
12200"/// This intrinsic is a utility function and does not correspond to a specific\n"
12201"/// instruction.\n"
12202"///\n"
12203"/// \\param __w15\n"
12204"/// A 16-bit integral value used to initialize bits [15:0] of the result.\n"
12205"/// \\param __w14\n"
12206"/// A 16-bit integral value used to initialize bits [31:16] of the result.\n"
12207"/// \\param __w13\n"
12208"/// A 16-bit integral value used to initialize bits [47:32] of the result.\n"
12209"/// \\param __w12\n"
12210"/// A 16-bit integral value used to initialize bits [63:48] of the result.\n"
12211"/// \\param __w11\n"
12212"/// A 16-bit integral value used to initialize bits [79:64] of the result.\n"
12213"/// \\param __w10\n"
12214"/// A 16-bit integral value used to initialize bits [95:80] of the result.\n"
12215"/// \\param __w09\n"
12216"/// A 16-bit integral value used to initialize bits [111:96] of the result.\n"
12217"/// \\param __w08\n"
12218"/// A 16-bit integral value used to initialize bits [127:112] of the result.\n"
12219"/// \\param __w07\n"
12220"/// A 16-bit integral value used to initialize bits [143:128] of the result.\n"
12221"/// \\param __w06\n"
12222"/// A 16-bit integral value used to initialize bits [159:144] of the result.\n"
12223"/// \\param __w05\n"
12224"/// A 16-bit integral value used to initialize bits [175:160] of the result.\n"
12225"/// \\param __w04\n"
12226"/// A 16-bit integral value used to initialize bits [191:176] of the result.\n"
12227"/// \\param __w03\n"
12228"/// A 16-bit integral value used to initialize bits [207:192] of the result.\n"
12229"/// \\param __w02\n"
12230"/// A 16-bit integral value used to initialize bits [223:208] of the result.\n"
12231"/// \\param __w01\n"
12232"/// A 16-bit integral value used to initialize bits [239:224] of the result.\n"
12233"/// \\param __w00\n"
12234"/// A 16-bit integral value used to initialize bits [255:240] of the result.\n"
12235"/// \\returns An initialized 256-bit integer vector.\n"
12236"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12237"_mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12,\n"
12238" short __w11, short __w10, short __w09, short __w08,\n"
12239" short __w07, short __w06, short __w05, short __w04,\n"
12240" short __w03, short __w02, short __w01, short __w00)\n"
12241"{\n"
12242" return _mm256_set_epi16(__w00, __w01, __w02, __w03,\n"
12243" __w04, __w05, __w06, __w07,\n"
12244" __w08, __w09, __w10, __w11,\n"
12245" __w12, __w13, __w14, __w15);\n"
12246"}\n"
12247"\n"
12248"/// Constructs a 256-bit integer vector, initialized in reverse order\n"
12249"/// with the specified 8-bit integral values.\n"
12250"///\n"
12251"/// \\headerfile <x86intrin.h>\n"
12252"///\n"
12253"/// This intrinsic is a utility function and does not correspond to a specific\n"
12254"/// instruction.\n"
12255"///\n"
12256"/// \\param __b31\n"
12257"/// An 8-bit integral value used to initialize bits [7:0] of the result.\n"
12258"/// \\param __b30\n"
12259"/// An 8-bit integral value used to initialize bits [15:8] of the result.\n"
12260"/// \\param __b29\n"
12261"/// An 8-bit integral value used to initialize bits [23:16] of the result.\n"
12262"/// \\param __b28\n"
12263"/// An 8-bit integral value used to initialize bits [31:24] of the result.\n"
12264"/// \\param __b27\n"
12265"/// An 8-bit integral value used to initialize bits [39:32] of the result.\n"
12266"/// \\param __b26\n"
12267"/// An 8-bit integral value used to initialize bits [47:40] of the result.\n"
12268"/// \\param __b25\n"
12269"/// An 8-bit integral value used to initialize bits [55:48] of the result.\n"
12270"/// \\param __b24\n"
12271"/// An 8-bit integral value used to initialize bits [63:56] of the result.\n"
12272"/// \\param __b23\n"
12273"/// An 8-bit integral value used to initialize bits [71:64] of the result.\n"
12274"/// \\param __b22\n"
12275"/// An 8-bit integral value used to initialize bits [79:72] of the result.\n"
12276"/// \\param __b21\n"
12277"/// An 8-bit integral value used to initialize bits [87:80] of the result.\n"
12278"/// \\param __b20\n"
12279"/// An 8-bit integral value used to initialize bits [95:88] of the result.\n"
12280"/// \\param __b19\n"
12281"/// An 8-bit integral value used to initialize bits [103:96] of the result.\n"
12282"/// \\param __b18\n"
12283"/// An 8-bit integral value used to initialize bits [111:104] of the result.\n"
12284"/// \\param __b17\n"
12285"/// An 8-bit integral value used to initialize bits [119:112] of the result.\n"
12286"/// \\param __b16\n"
12287"/// An 8-bit integral value used to initialize bits [127:120] of the result.\n"
12288"/// \\param __b15\n"
12289"/// An 8-bit integral value used to initialize bits [135:128] of the result.\n"
12290"/// \\param __b14\n"
12291"/// An 8-bit integral value used to initialize bits [143:136] of the result.\n"
12292"/// \\param __b13\n"
12293"/// An 8-bit integral value used to initialize bits [151:144] of the result.\n"
12294"/// \\param __b12\n"
12295"/// An 8-bit integral value used to initialize bits [159:152] of the result.\n"
12296"/// \\param __b11\n"
12297"/// An 8-bit integral value used to initialize bits [167:160] of the result.\n"
12298"/// \\param __b10\n"
12299"/// An 8-bit integral value used to initialize bits [175:168] of the result.\n"
12300"/// \\param __b09\n"
12301"/// An 8-bit integral value used to initialize bits [183:176] of the result.\n"
12302"/// \\param __b08\n"
12303"/// An 8-bit integral value used to initialize bits [191:184] of the result.\n"
12304"/// \\param __b07\n"
12305"/// An 8-bit integral value used to initialize bits [199:192] of the result.\n"
12306"/// \\param __b06\n"
12307"/// An 8-bit integral value used to initialize bits [207:200] of the result.\n"
12308"/// \\param __b05\n"
12309"/// An 8-bit integral value used to initialize bits [215:208] of the result.\n"
12310"/// \\param __b04\n"
12311"/// An 8-bit integral value used to initialize bits [223:216] of the result.\n"
12312"/// \\param __b03\n"
12313"/// An 8-bit integral value used to initialize bits [231:224] of the result.\n"
12314"/// \\param __b02\n"
12315"/// An 8-bit integral value used to initialize bits [239:232] of the result.\n"
12316"/// \\param __b01\n"
12317"/// An 8-bit integral value used to initialize bits [247:240] of the result.\n"
12318"/// \\param __b00\n"
12319"/// An 8-bit integral value used to initialize bits [255:248] of the result.\n"
12320"/// \\returns An initialized 256-bit integer vector.\n"
12321"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12322"_mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28,\n"
12323" char __b27, char __b26, char __b25, char __b24,\n"
12324" char __b23, char __b22, char __b21, char __b20,\n"
12325" char __b19, char __b18, char __b17, char __b16,\n"
12326" char __b15, char __b14, char __b13, char __b12,\n"
12327" char __b11, char __b10, char __b09, char __b08,\n"
12328" char __b07, char __b06, char __b05, char __b04,\n"
12329" char __b03, char __b02, char __b01, char __b00)\n"
12330"{\n"
12331" return _mm256_set_epi8(__b00, __b01, __b02, __b03, __b04, __b05, __b06, __b07,\n"
12332" __b08, __b09, __b10, __b11, __b12, __b13, __b14, __b15,\n"
12333" __b16, __b17, __b18, __b19, __b20, __b21, __b22, __b23,\n"
12334" __b24, __b25, __b26, __b27, __b28, __b29, __b30, __b31);\n"
12335"}\n"
12336"\n"
12337"/// Constructs a 256-bit integer vector, initialized in reverse order\n"
12338"/// with the specified 64-bit integral values.\n"
12339"///\n"
12340"/// \\headerfile <x86intrin.h>\n"
12341"///\n"
12342"/// This intrinsic corresponds to the <c> VPUNPCKLQDQ+VINSERTF128 </c>\n"
12343"/// instruction.\n"
12344"///\n"
12345"/// \\param __a\n"
12346"/// A 64-bit integral value used to initialize bits [63:0] of the result.\n"
12347"/// \\param __b\n"
12348"/// A 64-bit integral value used to initialize bits [127:64] of the result.\n"
12349"/// \\param __c\n"
12350"/// A 64-bit integral value used to initialize bits [191:128] of the result.\n"
12351"/// \\param __d\n"
12352"/// A 64-bit integral value used to initialize bits [255:192] of the result.\n"
12353"/// \\returns An initialized 256-bit integer vector.\n"
12354"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12355"_mm256_setr_epi64x(long long __a, long long __b, long long __c, long long __d)\n"
12356"{\n"
12357" return _mm256_set_epi64x(__d, __c, __b, __a);\n"
12358"}\n"
12359"\n"
12360"/* Create vectors with repeated elements */\n"
12361"/// Constructs a 256-bit floating-point vector of [4 x double], with each\n"
12362"/// of the four double-precision floating-point vector elements set to the\n"
12363"/// specified double-precision floating-point value.\n"
12364"///\n"
12365"/// \\headerfile <x86intrin.h>\n"
12366"///\n"
12367"/// This intrinsic corresponds to the <c> VMOVDDUP+VINSERTF128 </c> instruction.\n"
12368"///\n"
12369"/// \\param __w\n"
12370"/// A double-precision floating-point value used to initialize each vector\n"
12371"/// element of the result.\n"
12372"/// \\returns An initialized 256-bit floating-point vector of [4 x double].\n"
12373"static __inline __m256d __DEFAULT_FN_ATTRS\n"
12374"_mm256_set1_pd(double __w)\n"
12375"{\n"
12376" return _mm256_set_pd(__w, __w, __w, __w);\n"
12377"}\n"
12378"\n"
12379"/// Constructs a 256-bit floating-point vector of [8 x float], with each\n"
12380"/// of the eight single-precision floating-point vector elements set to the\n"
12381"/// specified single-precision floating-point value.\n"
12382"///\n"
12383"/// \\headerfile <x86intrin.h>\n"
12384"///\n"
12385"/// This intrinsic corresponds to the <c> VPERMILPS+VINSERTF128 </c>\n"
12386"/// instruction.\n"
12387"///\n"
12388"/// \\param __w\n"
12389"/// A single-precision floating-point value used to initialize each vector\n"
12390"/// element of the result.\n"
12391"/// \\returns An initialized 256-bit floating-point vector of [8 x float].\n"
12392"static __inline __m256 __DEFAULT_FN_ATTRS\n"
12393"_mm256_set1_ps(float __w)\n"
12394"{\n"
12395" return _mm256_set_ps(__w, __w, __w, __w, __w, __w, __w, __w);\n"
12396"}\n"
12397"\n"
12398"/// Constructs a 256-bit integer vector of [8 x i32], with each of the\n"
12399"/// 32-bit integral vector elements set to the specified 32-bit integral\n"
12400"/// value.\n"
12401"///\n"
12402"/// \\headerfile <x86intrin.h>\n"
12403"///\n"
12404"/// This intrinsic corresponds to the <c> VPERMILPS+VINSERTF128 </c>\n"
12405"/// instruction.\n"
12406"///\n"
12407"/// \\param __i\n"
12408"/// A 32-bit integral value used to initialize each vector element of the\n"
12409"/// result.\n"
12410"/// \\returns An initialized 256-bit integer vector of [8 x i32].\n"
12411"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12412"_mm256_set1_epi32(int __i)\n"
12413"{\n"
12414" return _mm256_set_epi32(__i, __i, __i, __i, __i, __i, __i, __i);\n"
12415"}\n"
12416"\n"
12417"/// Constructs a 256-bit integer vector of [16 x i16], with each of the\n"
12418"/// 16-bit integral vector elements set to the specified 16-bit integral\n"
12419"/// value.\n"
12420"///\n"
12421"/// \\headerfile <x86intrin.h>\n"
12422"///\n"
12423"/// This intrinsic corresponds to the <c> VPSHUFB+VINSERTF128 </c> instruction.\n"
12424"///\n"
12425"/// \\param __w\n"
12426"/// A 16-bit integral value used to initialize each vector element of the\n"
12427"/// result.\n"
12428"/// \\returns An initialized 256-bit integer vector of [16 x i16].\n"
12429"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12430"_mm256_set1_epi16(short __w)\n"
12431"{\n"
12432" return _mm256_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w,\n"
12433" __w, __w, __w, __w, __w, __w, __w, __w);\n"
12434"}\n"
12435"\n"
12436"/// Constructs a 256-bit integer vector of [32 x i8], with each of the\n"
12437"/// 8-bit integral vector elements set to the specified 8-bit integral value.\n"
12438"///\n"
12439"/// \\headerfile <x86intrin.h>\n"
12440"///\n"
12441"/// This intrinsic corresponds to the <c> VPSHUFB+VINSERTF128 </c> instruction.\n"
12442"///\n"
12443"/// \\param __b\n"
12444"/// An 8-bit integral value used to initialize each vector element of the\n"
12445"/// result.\n"
12446"/// \\returns An initialized 256-bit integer vector of [32 x i8].\n"
12447"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12448"_mm256_set1_epi8(char __b)\n"
12449"{\n"
12450" return _mm256_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b,\n"
12451" __b, __b, __b, __b, __b, __b, __b, __b,\n"
12452" __b, __b, __b, __b, __b, __b, __b, __b,\n"
12453" __b, __b, __b, __b, __b, __b, __b, __b);\n"
12454"}\n"
12455"\n"
12456"/// Constructs a 256-bit integer vector of [4 x i64], with each of the\n"
12457"/// 64-bit integral vector elements set to the specified 64-bit integral\n"
12458"/// value.\n"
12459"///\n"
12460"/// \\headerfile <x86intrin.h>\n"
12461"///\n"
12462"/// This intrinsic corresponds to the <c> VMOVDDUP+VINSERTF128 </c> instruction.\n"
12463"///\n"
12464"/// \\param __q\n"
12465"/// A 64-bit integral value used to initialize each vector element of the\n"
12466"/// result.\n"
12467"/// \\returns An initialized 256-bit integer vector of [4 x i64].\n"
12468"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12469"_mm256_set1_epi64x(long long __q)\n"
12470"{\n"
12471" return _mm256_set_epi64x(__q, __q, __q, __q);\n"
12472"}\n"
12473"\n"
12474"/* Create __zeroed vectors */\n"
12475"/// Constructs a 256-bit floating-point vector of [4 x double] with all\n"
12476"/// vector elements initialized to zero.\n"
12477"///\n"
12478"/// \\headerfile <x86intrin.h>\n"
12479"///\n"
12480"/// This intrinsic corresponds to the <c> VXORPS </c> instruction.\n"
12481"///\n"
12482"/// \\returns A 256-bit vector of [4 x double] with all elements set to zero.\n"
12483"static __inline __m256d __DEFAULT_FN_ATTRS\n"
12484"_mm256_setzero_pd(void)\n"
12485"{\n"
12486" return __extension__ (__m256d){ 0, 0, 0, 0 };\n"
12487"}\n"
12488"\n"
12489"/// Constructs a 256-bit floating-point vector of [8 x float] with all\n"
12490"/// vector elements initialized to zero.\n"
12491"///\n"
12492"/// \\headerfile <x86intrin.h>\n"
12493"///\n"
12494"/// This intrinsic corresponds to the <c> VXORPS </c> instruction.\n"
12495"///\n"
12496"/// \\returns A 256-bit vector of [8 x float] with all elements set to zero.\n"
12497"static __inline __m256 __DEFAULT_FN_ATTRS\n"
12498"_mm256_setzero_ps(void)\n"
12499"{\n"
12500" return __extension__ (__m256){ 0, 0, 0, 0, 0, 0, 0, 0 };\n"
12501"}\n"
12502"\n"
12503"/// Constructs a 256-bit integer vector initialized to zero.\n"
12504"///\n"
12505"/// \\headerfile <x86intrin.h>\n"
12506"///\n"
12507"/// This intrinsic corresponds to the <c> VXORPS </c> instruction.\n"
12508"///\n"
12509"/// \\returns A 256-bit integer vector initialized to zero.\n"
12510"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12511"_mm256_setzero_si256(void)\n"
12512"{\n"
12513" return __extension__ (__m256i)(__v4di){ 0, 0, 0, 0 };\n"
12514"}\n"
12515"\n"
12516"/* Cast between vector types */\n"
12517"/// Casts a 256-bit floating-point vector of [4 x double] into a 256-bit\n"
12518"/// floating-point vector of [8 x float].\n"
12519"///\n"
12520"/// \\headerfile <x86intrin.h>\n"
12521"///\n"
12522"/// This intrinsic has no corresponding instruction.\n"
12523"///\n"
12524"/// \\param __a\n"
12525"/// A 256-bit floating-point vector of [4 x double].\n"
12526"/// \\returns A 256-bit floating-point vector of [8 x float] containing the same\n"
12527"/// bitwise pattern as the parameter.\n"
12528"static __inline __m256 __DEFAULT_FN_ATTRS\n"
12529"_mm256_castpd_ps(__m256d __a)\n"
12530"{\n"
12531" return (__m256)__a;\n"
12532"}\n"
12533"\n"
12534"/// Casts a 256-bit floating-point vector of [4 x double] into a 256-bit\n"
12535"/// integer vector.\n"
12536"///\n"
12537"/// \\headerfile <x86intrin.h>\n"
12538"///\n"
12539"/// This intrinsic has no corresponding instruction.\n"
12540"///\n"
12541"/// \\param __a\n"
12542"/// A 256-bit floating-point vector of [4 x double].\n"
12543"/// \\returns A 256-bit integer vector containing the same bitwise pattern as the\n"
12544"/// parameter.\n"
12545"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12546"_mm256_castpd_si256(__m256d __a)\n"
12547"{\n"
12548" return (__m256i)__a;\n"
12549"}\n"
12550"\n"
12551"/// Casts a 256-bit floating-point vector of [8 x float] into a 256-bit\n"
12552"/// floating-point vector of [4 x double].\n"
12553"///\n"
12554"/// \\headerfile <x86intrin.h>\n"
12555"///\n"
12556"/// This intrinsic has no corresponding instruction.\n"
12557"///\n"
12558"/// \\param __a\n"
12559"/// A 256-bit floating-point vector of [8 x float].\n"
12560"/// \\returns A 256-bit floating-point vector of [4 x double] containing the same\n"
12561"/// bitwise pattern as the parameter.\n"
12562"static __inline __m256d __DEFAULT_FN_ATTRS\n"
12563"_mm256_castps_pd(__m256 __a)\n"
12564"{\n"
12565" return (__m256d)__a;\n"
12566"}\n"
12567"\n"
12568"/// Casts a 256-bit floating-point vector of [8 x float] into a 256-bit\n"
12569"/// integer vector.\n"
12570"///\n"
12571"/// \\headerfile <x86intrin.h>\n"
12572"///\n"
12573"/// This intrinsic has no corresponding instruction.\n"
12574"///\n"
12575"/// \\param __a\n"
12576"/// A 256-bit floating-point vector of [8 x float].\n"
12577"/// \\returns A 256-bit integer vector containing the same bitwise pattern as the\n"
12578"/// parameter.\n"
12579"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12580"_mm256_castps_si256(__m256 __a)\n"
12581"{\n"
12582" return (__m256i)__a;\n"
12583"}\n"
12584"\n"
12585"/// Casts a 256-bit integer vector into a 256-bit floating-point vector\n"
12586"/// of [8 x float].\n"
12587"///\n"
12588"/// \\headerfile <x86intrin.h>\n"
12589"///\n"
12590"/// This intrinsic has no corresponding instruction.\n"
12591"///\n"
12592"/// \\param __a\n"
12593"/// A 256-bit integer vector.\n"
12594"/// \\returns A 256-bit floating-point vector of [8 x float] containing the same\n"
12595"/// bitwise pattern as the parameter.\n"
12596"static __inline __m256 __DEFAULT_FN_ATTRS\n"
12597"_mm256_castsi256_ps(__m256i __a)\n"
12598"{\n"
12599" return (__m256)__a;\n"
12600"}\n"
12601"\n"
12602"/// Casts a 256-bit integer vector into a 256-bit floating-point vector\n"
12603"/// of [4 x double].\n"
12604"///\n"
12605"/// \\headerfile <x86intrin.h>\n"
12606"///\n"
12607"/// This intrinsic has no corresponding instruction.\n"
12608"///\n"
12609"/// \\param __a\n"
12610"/// A 256-bit integer vector.\n"
12611"/// \\returns A 256-bit floating-point vector of [4 x double] containing the same\n"
12612"/// bitwise pattern as the parameter.\n"
12613"static __inline __m256d __DEFAULT_FN_ATTRS\n"
12614"_mm256_castsi256_pd(__m256i __a)\n"
12615"{\n"
12616" return (__m256d)__a;\n"
12617"}\n"
12618"\n"
12619"/// Returns the lower 128 bits of a 256-bit floating-point vector of\n"
12620"/// [4 x double] as a 128-bit floating-point vector of [2 x double].\n"
12621"///\n"
12622"/// \\headerfile <x86intrin.h>\n"
12623"///\n"
12624"/// This intrinsic has no corresponding instruction.\n"
12625"///\n"
12626"/// \\param __a\n"
12627"/// A 256-bit floating-point vector of [4 x double].\n"
12628"/// \\returns A 128-bit floating-point vector of [2 x double] containing the\n"
12629"/// lower 128 bits of the parameter.\n"
12630"static __inline __m128d __DEFAULT_FN_ATTRS\n"
12631"_mm256_castpd256_pd128(__m256d __a)\n"
12632"{\n"
12633" return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 1);\n"
12634"}\n"
12635"\n"
12636"/// Returns the lower 128 bits of a 256-bit floating-point vector of\n"
12637"/// [8 x float] as a 128-bit floating-point vector of [4 x float].\n"
12638"///\n"
12639"/// \\headerfile <x86intrin.h>\n"
12640"///\n"
12641"/// This intrinsic has no corresponding instruction.\n"
12642"///\n"
12643"/// \\param __a\n"
12644"/// A 256-bit floating-point vector of [8 x float].\n"
12645"/// \\returns A 128-bit floating-point vector of [4 x float] containing the\n"
12646"/// lower 128 bits of the parameter.\n"
12647"static __inline __m128 __DEFAULT_FN_ATTRS\n"
12648"_mm256_castps256_ps128(__m256 __a)\n"
12649"{\n"
12650" return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 1, 2, 3);\n"
12651"}\n"
12652"\n"
12653"/// Truncates a 256-bit integer vector into a 128-bit integer vector.\n"
12654"///\n"
12655"/// \\headerfile <x86intrin.h>\n"
12656"///\n"
12657"/// This intrinsic has no corresponding instruction.\n"
12658"///\n"
12659"/// \\param __a\n"
12660"/// A 256-bit integer vector.\n"
12661"/// \\returns A 128-bit integer vector containing the lower 128 bits of the\n"
12662"/// parameter.\n"
12663"static __inline __m128i __DEFAULT_FN_ATTRS\n"
12664"_mm256_castsi256_si128(__m256i __a)\n"
12665"{\n"
12666" return __builtin_shufflevector((__v4di)__a, (__v4di)__a, 0, 1);\n"
12667"}\n"
12668"\n"
12669"/// Constructs a 256-bit floating-point vector of [4 x double] from a\n"
12670"/// 128-bit floating-point vector of [2 x double].\n"
12671"///\n"
12672"/// The lower 128 bits contain the value of the source vector. The contents\n"
12673"/// of the upper 128 bits are undefined.\n"
12674"///\n"
12675"/// \\headerfile <x86intrin.h>\n"
12676"///\n"
12677"/// This intrinsic has no corresponding instruction.\n"
12678"///\n"
12679"/// \\param __a\n"
12680"/// A 128-bit vector of [2 x double].\n"
12681"/// \\returns A 256-bit floating-point vector of [4 x double]. The lower 128 bits\n"
12682"/// contain the value of the parameter. The contents of the upper 128 bits\n"
12683"/// are undefined.\n"
12684"static __inline __m256d __DEFAULT_FN_ATTRS\n"
12685"_mm256_castpd128_pd256(__m128d __a)\n"
12686"{\n"
12687" return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 1, -1, -1);\n"
12688"}\n"
12689"\n"
12690"/// Constructs a 256-bit floating-point vector of [8 x float] from a\n"
12691"/// 128-bit floating-point vector of [4 x float].\n"
12692"///\n"
12693"/// The lower 128 bits contain the value of the source vector. The contents\n"
12694"/// of the upper 128 bits are undefined.\n"
12695"///\n"
12696"/// \\headerfile <x86intrin.h>\n"
12697"///\n"
12698"/// This intrinsic has no corresponding instruction.\n"
12699"///\n"
12700"/// \\param __a\n"
12701"/// A 128-bit vector of [4 x float].\n"
12702"/// \\returns A 256-bit floating-point vector of [8 x float]. The lower 128 bits\n"
12703"/// contain the value of the parameter. The contents of the upper 128 bits\n"
12704"/// are undefined.\n"
12705"static __inline __m256 __DEFAULT_FN_ATTRS\n"
12706"_mm256_castps128_ps256(__m128 __a)\n"
12707"{\n"
12708" return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1, 2, 3, -1, -1, -1, -1);\n"
12709"}\n"
12710"\n"
12711"/// Constructs a 256-bit integer vector from a 128-bit integer vector.\n"
12712"///\n"
12713"/// The lower 128 bits contain the value of the source vector. The contents\n"
12714"/// of the upper 128 bits are undefined.\n"
12715"///\n"
12716"/// \\headerfile <x86intrin.h>\n"
12717"///\n"
12718"/// This intrinsic has no corresponding instruction.\n"
12719"///\n"
12720"/// \\param __a\n"
12721"/// A 128-bit integer vector.\n"
12722"/// \\returns A 256-bit integer vector. The lower 128 bits contain the value of\n"
12723"/// the parameter. The contents of the upper 128 bits are undefined.\n"
12724"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12725"_mm256_castsi128_si256(__m128i __a)\n"
12726"{\n"
12727" return __builtin_shufflevector((__v2di)__a, (__v2di)__a, 0, 1, -1, -1);\n"
12728"}\n"
12729"\n"
12730"/// Constructs a 256-bit floating-point vector of [4 x double] from a\n"
12731"/// 128-bit floating-point vector of [2 x double]. The lower 128 bits\n"
12732"/// contain the value of the source vector. The upper 128 bits are set\n"
12733"/// to zero.\n"
12734"///\n"
12735"/// \\headerfile <x86intrin.h>\n"
12736"///\n"
12737"/// This intrinsic has no corresponding instruction.\n"
12738"///\n"
12739"/// \\param __a\n"
12740"/// A 128-bit vector of [2 x double].\n"
12741"/// \\returns A 256-bit floating-point vector of [4 x double]. The lower 128 bits\n"
12742"/// contain the value of the parameter. The upper 128 bits are set to zero.\n"
12743"static __inline __m256d __DEFAULT_FN_ATTRS\n"
12744"_mm256_zextpd128_pd256(__m128d __a)\n"
12745"{\n"
12746" return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3);\n"
12747"}\n"
12748"\n"
12749"/// Constructs a 256-bit floating-point vector of [8 x float] from a\n"
12750"/// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain\n"
12751"/// the value of the source vector. The upper 128 bits are set to zero.\n"
12752"///\n"
12753"/// \\headerfile <x86intrin.h>\n"
12754"///\n"
12755"/// This intrinsic has no corresponding instruction.\n"
12756"///\n"
12757"/// \\param __a\n"
12758"/// A 128-bit vector of [4 x float].\n"
12759"/// \\returns A 256-bit floating-point vector of [8 x float]. The lower 128 bits\n"
12760"/// contain the value of the parameter. The upper 128 bits are set to zero.\n"
12761"static __inline __m256 __DEFAULT_FN_ATTRS\n"
12762"_mm256_zextps128_ps256(__m128 __a)\n"
12763"{\n"
12764" return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7);\n"
12765"}\n"
12766"\n"
12767"/// Constructs a 256-bit integer vector from a 128-bit integer vector.\n"
12768"/// The lower 128 bits contain the value of the source vector. The upper\n"
12769"/// 128 bits are set to zero.\n"
12770"///\n"
12771"/// \\headerfile <x86intrin.h>\n"
12772"///\n"
12773"/// This intrinsic has no corresponding instruction.\n"
12774"///\n"
12775"/// \\param __a\n"
12776"/// A 128-bit integer vector.\n"
12777"/// \\returns A 256-bit integer vector. The lower 128 bits contain the value of\n"
12778"/// the parameter. The upper 128 bits are set to zero.\n"
12779"static __inline __m256i __DEFAULT_FN_ATTRS\n"
12780"_mm256_zextsi128_si256(__m128i __a)\n"
12781"{\n"
12782" return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3);\n"
12783"}\n"
12784"\n"
12785"/*\n"
12786" Vector insert.\n"
12787" We use macros rather than inlines because we only want to accept\n"
12788" invocations where the immediate M is a constant expression.\n"
12789"*/\n"
12790"/// Constructs a new 256-bit vector of [8 x float] by first duplicating\n"
12791"/// a 256-bit vector of [8 x float] given in the first parameter, and then\n"
12792"/// replacing either the upper or the lower 128 bits with the contents of a\n"
12793"/// 128-bit vector of [4 x float] in the second parameter.\n"
12794"///\n"
12795"/// The immediate integer parameter determines between the upper or the lower\n"
12796"/// 128 bits.\n"
12797"///\n"
12798"/// \\headerfile <x86intrin.h>\n"
12799"///\n"
12800"/// \\code\n"
12801"/// __m256 _mm256_insertf128_ps(__m256 V1, __m128 V2, const int M);\n"
12802"/// \\endcode\n"
12803"///\n"
12804"/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n"
12805"///\n"
12806"/// \\param V1\n"
12807"/// A 256-bit vector of [8 x float]. This vector is copied to the result\n"
12808"/// first, and then either the upper or the lower 128 bits of the result will\n"
12809"/// be replaced by the contents of \\a V2.\n"
12810"/// \\param V2\n"
12811"/// A 128-bit vector of [4 x float]. The contents of this parameter are\n"
12812"/// written to either the upper or the lower 128 bits of the result depending\n"
12813"/// on the value of parameter \\a M.\n"
12814"/// \\param M\n"
12815"/// An immediate integer. The least significant bit determines how the values\n"
12816"/// from the two parameters are interleaved: \\n\n"
12817"/// If bit [0] of \\a M is 0, \\a V2 are copied to bits [127:0] of the result,\n"
12818"/// and bits [255:128] of \\a V1 are copied to bits [255:128] of the\n"
12819"/// result. \\n\n"
12820"/// If bit [0] of \\a M is 1, \\a V2 are copied to bits [255:128] of the\n"
12821"/// result, and bits [127:0] of \\a V1 are copied to bits [127:0] of the\n"
12822"/// result.\n"
12823"/// \\returns A 256-bit vector of [8 x float] containing the interleaved values.\n"
12824"#define _mm256_insertf128_ps(V1, V2, M) \\\n"
12825" (__m256)__builtin_ia32_vinsertf128_ps256((__v8sf)(__m256)(V1), \\\n"
12826" (__v4sf)(__m128)(V2), (int)(M))\n"
12827"\n"
12828"/// Constructs a new 256-bit vector of [4 x double] by first duplicating\n"
12829"/// a 256-bit vector of [4 x double] given in the first parameter, and then\n"
12830"/// replacing either the upper or the lower 128 bits with the contents of a\n"
12831"/// 128-bit vector of [2 x double] in the second parameter.\n"
12832"///\n"
12833"/// The immediate integer parameter determines between the upper or the lower\n"
12834"/// 128 bits.\n"
12835"///\n"
12836"/// \\headerfile <x86intrin.h>\n"
12837"///\n"
12838"/// \\code\n"
12839"/// __m256d _mm256_insertf128_pd(__m256d V1, __m128d V2, const int M);\n"
12840"/// \\endcode\n"
12841"///\n"
12842"/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n"
12843"///\n"
12844"/// \\param V1\n"
12845"/// A 256-bit vector of [4 x double]. This vector is copied to the result\n"
12846"/// first, and then either the upper or the lower 128 bits of the result will\n"
12847"/// be replaced by the contents of \\a V2.\n"
12848"/// \\param V2\n"
12849"/// A 128-bit vector of [2 x double]. The contents of this parameter are\n"
12850"/// written to either the upper or the lower 128 bits of the result depending\n"
12851"/// on the value of parameter \\a M.\n"
12852"/// \\param M\n"
12853"/// An immediate integer. The least significant bit determines how the values\n"
12854"/// from the two parameters are interleaved: \\n\n"
12855"/// If bit [0] of \\a M is 0, \\a V2 are copied to bits [127:0] of the result,\n"
12856"/// and bits [255:128] of \\a V1 are copied to bits [255:128] of the\n"
12857"/// result. \\n\n"
12858"/// If bit [0] of \\a M is 1, \\a V2 are copied to bits [255:128] of the\n"
12859"/// result, and bits [127:0] of \\a V1 are copied to bits [127:0] of the\n"
12860"/// result.\n"
12861"/// \\returns A 256-bit vector of [4 x double] containing the interleaved values.\n"
12862"#define _mm256_insertf128_pd(V1, V2, M) \\\n"
12863" (__m256d)__builtin_ia32_vinsertf128_pd256((__v4df)(__m256d)(V1), \\\n"
12864" (__v2df)(__m128d)(V2), (int)(M))\n"
12865"\n"
12866"/// Constructs a new 256-bit integer vector by first duplicating a\n"
12867"/// 256-bit integer vector given in the first parameter, and then replacing\n"
12868"/// either the upper or the lower 128 bits with the contents of a 128-bit\n"
12869"/// integer vector in the second parameter.\n"
12870"///\n"
12871"/// The immediate integer parameter determines between the upper or the lower\n"
12872"/// 128 bits.\n"
12873"///\n"
12874"/// \\headerfile <x86intrin.h>\n"
12875"///\n"
12876"/// \\code\n"
12877"/// __m256i _mm256_insertf128_si256(__m256i V1, __m128i V2, const int M);\n"
12878"/// \\endcode\n"
12879"///\n"
12880"/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n"
12881"///\n"
12882"/// \\param V1\n"
12883"/// A 256-bit integer vector. This vector is copied to the result first, and\n"
12884"/// then either the upper or the lower 128 bits of the result will be\n"
12885"/// replaced by the contents of \\a V2.\n"
12886"/// \\param V2\n"
12887"/// A 128-bit integer vector. The contents of this parameter are written to\n"
12888"/// either the upper or the lower 128 bits of the result depending on the\n"
12889"/// value of parameter \\a M.\n"
12890"/// \\param M\n"
12891"/// An immediate integer. The least significant bit determines how the values\n"
12892"/// from the two parameters are interleaved: \\n\n"
12893"/// If bit [0] of \\a M is 0, \\a V2 are copied to bits [127:0] of the result,\n"
12894"/// and bits [255:128] of \\a V1 are copied to bits [255:128] of the\n"
12895"/// result. \\n\n"
12896"/// If bit [0] of \\a M is 1, \\a V2 are copied to bits [255:128] of the\n"
12897"/// result, and bits [127:0] of \\a V1 are copied to bits [127:0] of the\n"
12898"/// result.\n"
12899"/// \\returns A 256-bit integer vector containing the interleaved values.\n"
12900"#define _mm256_insertf128_si256(V1, V2, M) \\\n"
12901" (__m256i)__builtin_ia32_vinsertf128_si256((__v8si)(__m256i)(V1), \\\n"
12902" (__v4si)(__m128i)(V2), (int)(M))\n"
12903"\n"
12904"/*\n"
12905" Vector extract.\n"
12906" We use macros rather than inlines because we only want to accept\n"
12907" invocations where the immediate M is a constant expression.\n"
12908"*/\n"
12909"/// Extracts either the upper or the lower 128 bits from a 256-bit vector\n"
12910"/// of [8 x float], as determined by the immediate integer parameter, and\n"
12911"/// returns the extracted bits as a 128-bit vector of [4 x float].\n"
12912"///\n"
12913"/// \\headerfile <x86intrin.h>\n"
12914"///\n"
12915"/// \\code\n"
12916"/// __m128 _mm256_extractf128_ps(__m256 V, const int M);\n"
12917"/// \\endcode\n"
12918"///\n"
12919"/// This intrinsic corresponds to the <c> VEXTRACTF128 </c> instruction.\n"
12920"///\n"
12921"/// \\param V\n"
12922"/// A 256-bit vector of [8 x float].\n"
12923"/// \\param M\n"
12924"/// An immediate integer. The least significant bit determines which bits are\n"
12925"/// extracted from the first parameter: \\n\n"
12926"/// If bit [0] of \\a M is 0, bits [127:0] of \\a V are copied to the\n"
12927"/// result. \\n\n"
12928"/// If bit [0] of \\a M is 1, bits [255:128] of \\a V are copied to the result.\n"
12929"/// \\returns A 128-bit vector of [4 x float] containing the extracted bits.\n"
12930"#define _mm256_extractf128_ps(V, M) \\\n"
12931" (__m128)__builtin_ia32_vextractf128_ps256((__v8sf)(__m256)(V), (int)(M))\n"
12932"\n"
12933"/// Extracts either the upper or the lower 128 bits from a 256-bit vector\n"
12934"/// of [4 x double], as determined by the immediate integer parameter, and\n"
12935"/// returns the extracted bits as a 128-bit vector of [2 x double].\n"
12936"///\n"
12937"/// \\headerfile <x86intrin.h>\n"
12938"///\n"
12939"/// \\code\n"
12940"/// __m128d _mm256_extractf128_pd(__m256d V, const int M);\n"
12941"/// \\endcode\n"
12942"///\n"
12943"/// This intrinsic corresponds to the <c> VEXTRACTF128 </c> instruction.\n"
12944"///\n"
12945"/// \\param V\n"
12946"/// A 256-bit vector of [4 x double].\n"
12947"/// \\param M\n"
12948"/// An immediate integer. The least significant bit determines which bits are\n"
12949"/// extracted from the first parameter: \\n\n"
12950"/// If bit [0] of \\a M is 0, bits [127:0] of \\a V are copied to the\n"
12951"/// result. \\n\n"
12952"/// If bit [0] of \\a M is 1, bits [255:128] of \\a V are copied to the result.\n"
12953"/// \\returns A 128-bit vector of [2 x double] containing the extracted bits.\n"
12954"#define _mm256_extractf128_pd(V, M) \\\n"
12955" (__m128d)__builtin_ia32_vextractf128_pd256((__v4df)(__m256d)(V), (int)(M))\n"
12956"\n"
12957"/// Extracts either the upper or the lower 128 bits from a 256-bit\n"
12958"/// integer vector, as determined by the immediate integer parameter, and\n"
12959"/// returns the extracted bits as a 128-bit integer vector.\n"
12960"///\n"
12961"/// \\headerfile <x86intrin.h>\n"
12962"///\n"
12963"/// \\code\n"
12964"/// __m128i _mm256_extractf128_si256(__m256i V, const int M);\n"
12965"/// \\endcode\n"
12966"///\n"
12967"/// This intrinsic corresponds to the <c> VEXTRACTF128 </c> instruction.\n"
12968"///\n"
12969"/// \\param V\n"
12970"/// A 256-bit integer vector.\n"
12971"/// \\param M\n"
12972"/// An immediate integer. The least significant bit determines which bits are\n"
12973"/// extracted from the first parameter: \\n\n"
12974"/// If bit [0] of \\a M is 0, bits [127:0] of \\a V are copied to the\n"
12975"/// result. \\n\n"
12976"/// If bit [0] of \\a M is 1, bits [255:128] of \\a V are copied to the result.\n"
12977"/// \\returns A 128-bit integer vector containing the extracted bits.\n"
12978"#define _mm256_extractf128_si256(V, M) \\\n"
12979" (__m128i)__builtin_ia32_vextractf128_si256((__v8si)(__m256i)(V), (int)(M))\n"
12980"\n"
12981"/* SIMD load ops (unaligned) */\n"
12982"/// Loads two 128-bit floating-point vectors of [4 x float] from\n"
12983"/// unaligned memory locations and constructs a 256-bit floating-point vector\n"
12984"/// of [8 x float] by concatenating the two 128-bit vectors.\n"
12985"///\n"
12986"/// \\headerfile <x86intrin.h>\n"
12987"///\n"
12988"/// This intrinsic corresponds to load instructions followed by the\n"
12989"/// <c> VINSERTF128 </c> instruction.\n"
12990"///\n"
12991"/// \\param __addr_hi\n"
12992"/// A pointer to a 128-bit memory location containing 4 consecutive\n"
12993"/// single-precision floating-point values. These values are to be copied to\n"
12994"/// bits[255:128] of the result. The address of the memory location does not\n"
12995"/// have to be aligned.\n"
12996"/// \\param __addr_lo\n"
12997"/// A pointer to a 128-bit memory location containing 4 consecutive\n"
12998"/// single-precision floating-point values. These values are to be copied to\n"
12999"/// bits[127:0] of the result. The address of the memory location does not\n"
13000"/// have to be aligned.\n"
13001"/// \\returns A 256-bit floating-point vector of [8 x float] containing the\n"
13002"/// concatenated result.\n"
13003"static __inline __m256 __DEFAULT_FN_ATTRS\n"
13004"_mm256_loadu2_m128(float const *__addr_hi, float const *__addr_lo)\n"
13005"{\n"
13006" __m256 __v256 = _mm256_castps128_ps256(_mm_loadu_ps(__addr_lo));\n"
13007" return _mm256_insertf128_ps(__v256, _mm_loadu_ps(__addr_hi), 1);\n"
13008"}\n"
13009"\n"
13010"/// Loads two 128-bit floating-point vectors of [2 x double] from\n"
13011"/// unaligned memory locations and constructs a 256-bit floating-point vector\n"
13012"/// of [4 x double] by concatenating the two 128-bit vectors.\n"
13013"///\n"
13014"/// \\headerfile <x86intrin.h>\n"
13015"///\n"
13016"/// This intrinsic corresponds to load instructions followed by the\n"
13017"/// <c> VINSERTF128 </c> instruction.\n"
13018"///\n"
13019"/// \\param __addr_hi\n"
13020"/// A pointer to a 128-bit memory location containing two consecutive\n"
13021"/// double-precision floating-point values. These values are to be copied to\n"
13022"/// bits[255:128] of the result. The address of the memory location does not\n"
13023"/// have to be aligned.\n"
13024"/// \\param __addr_lo\n"
13025"/// A pointer to a 128-bit memory location containing two consecutive\n"
13026"/// double-precision floating-point values. These values are to be copied to\n"
13027"/// bits[127:0] of the result. The address of the memory location does not\n"
13028"/// have to be aligned.\n"
13029"/// \\returns A 256-bit floating-point vector of [4 x double] containing the\n"
13030"/// concatenated result.\n"
13031"static __inline __m256d __DEFAULT_FN_ATTRS\n"
13032"_mm256_loadu2_m128d(double const *__addr_hi, double const *__addr_lo)\n"
13033"{\n"
13034" __m256d __v256 = _mm256_castpd128_pd256(_mm_loadu_pd(__addr_lo));\n"
13035" return _mm256_insertf128_pd(__v256, _mm_loadu_pd(__addr_hi), 1);\n"
13036"}\n"
13037"\n"
13038"/// Loads two 128-bit integer vectors from unaligned memory locations and\n"
13039"/// constructs a 256-bit integer vector by concatenating the two 128-bit\n"
13040"/// vectors.\n"
13041"///\n"
13042"/// \\headerfile <x86intrin.h>\n"
13043"///\n"
13044"/// This intrinsic corresponds to load instructions followed by the\n"
13045"/// <c> VINSERTF128 </c> instruction.\n"
13046"///\n"
13047"/// \\param __addr_hi\n"
13048"/// A pointer to a 128-bit memory location containing a 128-bit integer\n"
13049"/// vector. This vector is to be copied to bits[255:128] of the result. The\n"
13050"/// address of the memory location does not have to be aligned.\n"
13051"/// \\param __addr_lo\n"
13052"/// A pointer to a 128-bit memory location containing a 128-bit integer\n"
13053"/// vector. This vector is to be copied to bits[127:0] of the result. The\n"
13054"/// address of the memory location does not have to be aligned.\n"
13055"/// \\returns A 256-bit integer vector containing the concatenated result.\n"
13056"static __inline __m256i __DEFAULT_FN_ATTRS\n"
13057"_mm256_loadu2_m128i(__m128i const *__addr_hi, __m128i const *__addr_lo)\n"
13058"{\n"
13059" __m256i __v256 = _mm256_castsi128_si256(_mm_loadu_si128(__addr_lo));\n"
13060" return _mm256_insertf128_si256(__v256, _mm_loadu_si128(__addr_hi), 1);\n"
13061"}\n"
13062"\n"
13063"/* SIMD store ops (unaligned) */\n"
13064"/// Stores the upper and lower 128 bits of a 256-bit floating-point\n"
13065"/// vector of [8 x float] into two different unaligned memory locations.\n"
13066"///\n"
13067"/// \\headerfile <x86intrin.h>\n"
13068"///\n"
13069"/// This intrinsic corresponds to the <c> VEXTRACTF128 </c> instruction and the\n"
13070"/// store instructions.\n"
13071"///\n"
13072"/// \\param __addr_hi\n"
13073"/// A pointer to a 128-bit memory location. Bits[255:128] of \\a __a are to be\n"
13074"/// copied to this memory location. The address of this memory location does\n"
13075"/// not have to be aligned.\n"
13076"/// \\param __addr_lo\n"
13077"/// A pointer to a 128-bit memory location. Bits[127:0] of \\a __a are to be\n"
13078"/// copied to this memory location. The address of this memory location does\n"
13079"/// not have to be aligned.\n"
13080"/// \\param __a\n"
13081"/// A 256-bit floating-point vector of [8 x float].\n"
13082"static __inline void __DEFAULT_FN_ATTRS\n"
13083"_mm256_storeu2_m128(float *__addr_hi, float *__addr_lo, __m256 __a)\n"
13084"{\n"
13085" __m128 __v128;\n"
13086"\n"
13087" __v128 = _mm256_castps256_ps128(__a);\n"
13088" _mm_storeu_ps(__addr_lo, __v128);\n"
13089" __v128 = _mm256_extractf128_ps(__a, 1);\n"
13090" _mm_storeu_ps(__addr_hi, __v128);\n"
13091"}\n"
13092"\n"
13093"/// Stores the upper and lower 128 bits of a 256-bit floating-point\n"
13094"/// vector of [4 x double] into two different unaligned memory locations.\n"
13095"///\n"
13096"/// \\headerfile <x86intrin.h>\n"
13097"///\n"
13098"/// This intrinsic corresponds to the <c> VEXTRACTF128 </c> instruction and the\n"
13099"/// store instructions.\n"
13100"///\n"
13101"/// \\param __addr_hi\n"
13102"/// A pointer to a 128-bit memory location. Bits[255:128] of \\a __a are to be\n"
13103"/// copied to this memory location. The address of this memory location does\n"
13104"/// not have to be aligned.\n"
13105"/// \\param __addr_lo\n"
13106"/// A pointer to a 128-bit memory location. Bits[127:0] of \\a __a are to be\n"
13107"/// copied to this memory location. The address of this memory location does\n"
13108"/// not have to be aligned.\n"
13109"/// \\param __a\n"
13110"/// A 256-bit floating-point vector of [4 x double].\n"
13111"static __inline void __DEFAULT_FN_ATTRS\n"
13112"_mm256_storeu2_m128d(double *__addr_hi, double *__addr_lo, __m256d __a)\n"
13113"{\n"
13114" __m128d __v128;\n"
13115"\n"
13116" __v128 = _mm256_castpd256_pd128(__a);\n"
13117" _mm_storeu_pd(__addr_lo, __v128);\n"
13118" __v128 = _mm256_extractf128_pd(__a, 1);\n"
13119" _mm_storeu_pd(__addr_hi, __v128);\n"
13120"}\n"
13121"\n"
13122"/// Stores the upper and lower 128 bits of a 256-bit integer vector into\n"
13123"/// two different unaligned memory locations.\n"
13124"///\n"
13125"/// \\headerfile <x86intrin.h>\n"
13126"///\n"
13127"/// This intrinsic corresponds to the <c> VEXTRACTF128 </c> instruction and the\n"
13128"/// store instructions.\n"
13129"///\n"
13130"/// \\param __addr_hi\n"
13131"/// A pointer to a 128-bit memory location. Bits[255:128] of \\a __a are to be\n"
13132"/// copied to this memory location. The address of this memory location does\n"
13133"/// not have to be aligned.\n"
13134"/// \\param __addr_lo\n"
13135"/// A pointer to a 128-bit memory location. Bits[127:0] of \\a __a are to be\n"
13136"/// copied to this memory location. The address of this memory location does\n"
13137"/// not have to be aligned.\n"
13138"/// \\param __a\n"
13139"/// A 256-bit integer vector.\n"
13140"static __inline void __DEFAULT_FN_ATTRS\n"
13141"_mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo, __m256i __a)\n"
13142"{\n"
13143" __m128i __v128;\n"
13144"\n"
13145" __v128 = _mm256_castsi256_si128(__a);\n"
13146" _mm_storeu_si128(__addr_lo, __v128);\n"
13147" __v128 = _mm256_extractf128_si256(__a, 1);\n"
13148" _mm_storeu_si128(__addr_hi, __v128);\n"
13149"}\n"
13150"\n"
13151"/// Constructs a 256-bit floating-point vector of [8 x float] by\n"
13152"/// concatenating two 128-bit floating-point vectors of [4 x float].\n"
13153"///\n"
13154"/// \\headerfile <x86intrin.h>\n"
13155"///\n"
13156"/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n"
13157"///\n"
13158"/// \\param __hi\n"
13159"/// A 128-bit floating-point vector of [4 x float] to be copied to the upper\n"
13160"/// 128 bits of the result.\n"
13161"/// \\param __lo\n"
13162"/// A 128-bit floating-point vector of [4 x float] to be copied to the lower\n"
13163"/// 128 bits of the result.\n"
13164"/// \\returns A 256-bit floating-point vector of [8 x float] containing the\n"
13165"/// concatenated result.\n"
13166"static __inline __m256 __DEFAULT_FN_ATTRS\n"
13167"_mm256_set_m128 (__m128 __hi, __m128 __lo)\n"
13168"{\n"
13169" return (__m256) __builtin_shufflevector((__v4sf)__lo, (__v4sf)__hi, 0, 1, 2, 3, 4, 5, 6, 7);\n"
13170"}\n"
13171"\n"
13172"/// Constructs a 256-bit floating-point vector of [4 x double] by\n"
13173"/// concatenating two 128-bit floating-point vectors of [2 x double].\n"
13174"///\n"
13175"/// \\headerfile <x86intrin.h>\n"
13176"///\n"
13177"/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n"
13178"///\n"
13179"/// \\param __hi\n"
13180"/// A 128-bit floating-point vector of [2 x double] to be copied to the upper\n"
13181"/// 128 bits of the result.\n"
13182"/// \\param __lo\n"
13183"/// A 128-bit floating-point vector of [2 x double] to be copied to the lower\n"
13184"/// 128 bits of the result.\n"
13185"/// \\returns A 256-bit floating-point vector of [4 x double] containing the\n"
13186"/// concatenated result.\n"
13187"static __inline __m256d __DEFAULT_FN_ATTRS\n"
13188"_mm256_set_m128d (__m128d __hi, __m128d __lo)\n"
13189"{\n"
13190" return (__m256d) __builtin_shufflevector((__v2df)__lo, (__v2df)__hi, 0, 1, 2, 3);\n"
13191"}\n"
13192"\n"
13193"/// Constructs a 256-bit integer vector by concatenating two 128-bit\n"
13194"/// integer vectors.\n"
13195"///\n"
13196"/// \\headerfile <x86intrin.h>\n"
13197"///\n"
13198"/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n"
13199"///\n"
13200"/// \\param __hi\n"
13201"/// A 128-bit integer vector to be copied to the upper 128 bits of the\n"
13202"/// result.\n"
13203"/// \\param __lo\n"
13204"/// A 128-bit integer vector to be copied to the lower 128 bits of the\n"
13205"/// result.\n"
13206"/// \\returns A 256-bit integer vector containing the concatenated result.\n"
13207"static __inline __m256i __DEFAULT_FN_ATTRS\n"
13208"_mm256_set_m128i (__m128i __hi, __m128i __lo)\n"
13209"{\n"
13210" return (__m256i) __builtin_shufflevector((__v2di)__lo, (__v2di)__hi, 0, 1, 2, 3);\n"
13211"}\n"
13212"\n"
13213"/// Constructs a 256-bit floating-point vector of [8 x float] by\n"
13214"/// concatenating two 128-bit floating-point vectors of [4 x float]. This is\n"
13215"/// similar to _mm256_set_m128, but the order of the input parameters is\n"
13216"/// swapped.\n"
13217"///\n"
13218"/// \\headerfile <x86intrin.h>\n"
13219"///\n"
13220"/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n"
13221"///\n"
13222"/// \\param __lo\n"
13223"/// A 128-bit floating-point vector of [4 x float] to be copied to the lower\n"
13224"/// 128 bits of the result.\n"
13225"/// \\param __hi\n"
13226"/// A 128-bit floating-point vector of [4 x float] to be copied to the upper\n"
13227"/// 128 bits of the result.\n"
13228"/// \\returns A 256-bit floating-point vector of [8 x float] containing the\n"
13229"/// concatenated result.\n"
13230"static __inline __m256 __DEFAULT_FN_ATTRS\n"
13231"_mm256_setr_m128 (__m128 __lo, __m128 __hi)\n"
13232"{\n"
13233" return _mm256_set_m128(__hi, __lo);\n"
13234"}\n"
13235"\n"
13236"/// Constructs a 256-bit floating-point vector of [4 x double] by\n"
13237"/// concatenating two 128-bit floating-point vectors of [2 x double]. This is\n"
13238"/// similar to _mm256_set_m128d, but the order of the input parameters is\n"
13239"/// swapped.\n"
13240"///\n"
13241"/// \\headerfile <x86intrin.h>\n"
13242"///\n"
13243"/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n"
13244"///\n"
13245"/// \\param __lo\n"
13246"/// A 128-bit floating-point vector of [2 x double] to be copied to the lower\n"
13247"/// 128 bits of the result.\n"
13248"/// \\param __hi\n"
13249"/// A 128-bit floating-point vector of [2 x double] to be copied to the upper\n"
13250"/// 128 bits of the result.\n"
13251"/// \\returns A 256-bit floating-point vector of [4 x double] containing the\n"
13252"/// concatenated result.\n"
13253"static __inline __m256d __DEFAULT_FN_ATTRS\n"
13254"_mm256_setr_m128d (__m128d __lo, __m128d __hi)\n"
13255"{\n"
13256" return (__m256d)_mm256_set_m128d(__hi, __lo);\n"
13257"}\n"
13258"\n"
13259"/// Constructs a 256-bit integer vector by concatenating two 128-bit\n"
13260"/// integer vectors. This is similar to _mm256_set_m128i, but the order of\n"
13261"/// the input parameters is swapped.\n"
13262"///\n"
13263"/// \\headerfile <x86intrin.h>\n"
13264"///\n"
13265"/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n"
13266"///\n"
13267"/// \\param __lo\n"
13268"/// A 128-bit integer vector to be copied to the lower 128 bits of the\n"
13269"/// result.\n"
13270"/// \\param __hi\n"
13271"/// A 128-bit integer vector to be copied to the upper 128 bits of the\n"
13272"/// result.\n"
13273"/// \\returns A 256-bit integer vector containing the concatenated result.\n"
13274"static __inline __m256i __DEFAULT_FN_ATTRS\n"
13275"_mm256_setr_m128i (__m128i __lo, __m128i __hi)\n"
13276"{\n"
13277" return (__m256i)_mm256_set_m128i(__hi, __lo);\n"
13278"}\n"
13279"\n"
13280"#undef __DEFAULT_FN_ATTRS\n"
13281"#undef __DEFAULT_FN_ATTRS128\n"
13282"\n"
13283"#endif /* __AVXINTRIN_H */\n"
13284"" } ,
13285 { "/builtins/bmi2intrin.h" , "/*===---- bmi2intrin.h - BMI2 intrinsics -----------------------------------===\n"
13286" *\n"
13287" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
13288" * of this software and associated documentation files (the \"Software\"), to deal\n"
13289" * in the Software without restriction, including without limitation the rights\n"
13290" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
13291" * copies of the Software, and to permit persons to whom the Software is\n"
13292" * furnished to do so, subject to the following conditions:\n"
13293" *\n"
13294" * The above copyright notice and this permission notice shall be included in\n"
13295" * all copies or substantial portions of the Software.\n"
13296" *\n"
13297" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
13298" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
13299" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
13300" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
13301" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
13302" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
13303" * THE SOFTWARE.\n"
13304" *\n"
13305" *===-----------------------------------------------------------------------===\n"
13306" */\n"
13307"\n"
13308"#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n"
13309"#error \"Never use <bmi2intrin.h> directly; include <x86intrin.h> instead.\"\n"
13310"#endif\n"
13311"\n"
13312"#ifndef __BMI2INTRIN_H\n"
13313"#define __BMI2INTRIN_H\n"
13314"\n"
13315"/* Define the default attributes for the functions in this file. */\n"
13316"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"bmi2\")))\n"
13317"\n"
13318"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
13319"_bzhi_u32(unsigned int __X, unsigned int __Y)\n"
13320"{\n"
13321" return __builtin_ia32_bzhi_si(__X, __Y);\n"
13322"}\n"
13323"\n"
13324"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
13325"_pdep_u32(unsigned int __X, unsigned int __Y)\n"
13326"{\n"
13327" return __builtin_ia32_pdep_si(__X, __Y);\n"
13328"}\n"
13329"\n"
13330"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
13331"_pext_u32(unsigned int __X, unsigned int __Y)\n"
13332"{\n"
13333" return __builtin_ia32_pext_si(__X, __Y);\n"
13334"}\n"
13335"\n"
13336"#ifdef __x86_64__\n"
13337"\n"
13338"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
13339"_bzhi_u64(unsigned long long __X, unsigned long long __Y)\n"
13340"{\n"
13341" return __builtin_ia32_bzhi_di(__X, __Y);\n"
13342"}\n"
13343"\n"
13344"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
13345"_pdep_u64(unsigned long long __X, unsigned long long __Y)\n"
13346"{\n"
13347" return __builtin_ia32_pdep_di(__X, __Y);\n"
13348"}\n"
13349"\n"
13350"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
13351"_pext_u64(unsigned long long __X, unsigned long long __Y)\n"
13352"{\n"
13353" return __builtin_ia32_pext_di(__X, __Y);\n"
13354"}\n"
13355"\n"
13356"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
13357"_mulx_u64 (unsigned long long __X, unsigned long long __Y,\n"
13358" unsigned long long *__P)\n"
13359"{\n"
13360" unsigned __int128 __res = (unsigned __int128) __X * __Y;\n"
13361" *__P = (unsigned long long) (__res >> 64);\n"
13362" return (unsigned long long) __res;\n"
13363"}\n"
13364"\n"
13365"#else /* !__x86_64__ */\n"
13366"\n"
13367"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
13368"_mulx_u32 (unsigned int __X, unsigned int __Y, unsigned int *__P)\n"
13369"{\n"
13370" unsigned long long __res = (unsigned long long) __X * __Y;\n"
13371" *__P = (unsigned int) (__res >> 32);\n"
13372" return (unsigned int) __res;\n"
13373"}\n"
13374"\n"
13375"#endif /* !__x86_64__ */\n"
13376"\n"
13377"#undef __DEFAULT_FN_ATTRS\n"
13378"\n"
13379"#endif /* __BMI2INTRIN_H */\n"
13380"" } ,
13381 { "/builtins/bmiintrin.h" , "/*===---- bmiintrin.h - BMI intrinsics -------------------------------------===\n"
13382" *\n"
13383" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
13384" * of this software and associated documentation files (the \"Software\"), to deal\n"
13385" * in the Software without restriction, including without limitation the rights\n"
13386" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
13387" * copies of the Software, and to permit persons to whom the Software is\n"
13388" * furnished to do so, subject to the following conditions:\n"
13389" *\n"
13390" * The above copyright notice and this permission notice shall be included in\n"
13391" * all copies or substantial portions of the Software.\n"
13392" *\n"
13393" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
13394" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
13395" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
13396" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
13397" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
13398" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
13399" * THE SOFTWARE.\n"
13400" *\n"
13401" *===-----------------------------------------------------------------------===\n"
13402" */\n"
13403"\n"
13404"#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n"
13405"#error \"Never use <bmiintrin.h> directly; include <x86intrin.h> instead.\"\n"
13406"#endif\n"
13407"\n"
13408"#ifndef __BMIINTRIN_H\n"
13409"#define __BMIINTRIN_H\n"
13410"\n"
13411"#define _tzcnt_u16(a) (__tzcnt_u16((a)))\n"
13412"\n"
13413"#define _andn_u32(a, b) (__andn_u32((a), (b)))\n"
13414"\n"
13415"/* _bextr_u32 != __bextr_u32 */\n"
13416"#define _blsi_u32(a) (__blsi_u32((a)))\n"
13417"\n"
13418"#define _blsmsk_u32(a) (__blsmsk_u32((a)))\n"
13419"\n"
13420"#define _blsr_u32(a) (__blsr_u32((a)))\n"
13421"\n"
13422"#define _tzcnt_u32(a) (__tzcnt_u32((a)))\n"
13423"\n"
13424"/* Define the default attributes for the functions in this file. */\n"
13425"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"bmi\")))\n"
13426"\n"
13427"/* Allow using the tzcnt intrinsics even for non-BMI targets. Since the TZCNT\n"
13428" instruction behaves as BSF on non-BMI targets, there is code that expects\n"
13429" to use it as a potentially faster version of BSF. */\n"
13430"#define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__))\n"
13431"\n"
13432"/// Counts the number of trailing zero bits in the operand.\n"
13433"///\n"
13434"/// \\headerfile <x86intrin.h>\n"
13435"///\n"
13436"/// This intrinsic corresponds to the <c> TZCNT </c> instruction.\n"
13437"///\n"
13438"/// \\param __X\n"
13439"/// An unsigned 16-bit integer whose trailing zeros are to be counted.\n"
13440"/// \\returns An unsigned 16-bit integer containing the number of trailing zero\n"
13441"/// bits in the operand.\n"
13442"static __inline__ unsigned short __RELAXED_FN_ATTRS\n"
13443"__tzcnt_u16(unsigned short __X)\n"
13444"{\n"
13445" return __X ? __builtin_ctzs(__X) : 16;\n"
13446"}\n"
13447"\n"
13448"/// Performs a bitwise AND of the second operand with the one's\n"
13449"/// complement of the first operand.\n"
13450"///\n"
13451"/// \\headerfile <x86intrin.h>\n"
13452"///\n"
13453"/// This intrinsic corresponds to the <c> ANDN </c> instruction.\n"
13454"///\n"
13455"/// \\param __X\n"
13456"/// An unsigned integer containing one of the operands.\n"
13457"/// \\param __Y\n"
13458"/// An unsigned integer containing one of the operands.\n"
13459"/// \\returns An unsigned integer containing the bitwise AND of the second\n"
13460"/// operand with the one's complement of the first operand.\n"
13461"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
13462"__andn_u32(unsigned int __X, unsigned int __Y)\n"
13463"{\n"
13464" return ~__X & __Y;\n"
13465"}\n"
13466"\n"
13467"/* AMD-specified, double-leading-underscore version of BEXTR */\n"
13468"/// Extracts the specified bits from the first operand and returns them\n"
13469"/// in the least significant bits of the result.\n"
13470"///\n"
13471"/// \\headerfile <x86intrin.h>\n"
13472"///\n"
13473"/// This intrinsic corresponds to the <c> BEXTR </c> instruction.\n"
13474"///\n"
13475"/// \\param __X\n"
13476"/// An unsigned integer whose bits are to be extracted.\n"
13477"/// \\param __Y\n"
13478"/// An unsigned integer used to specify which bits are extracted. Bits [7:0]\n"
13479"/// specify the index of the least significant bit. Bits [15:8] specify the\n"
13480"/// number of bits to be extracted.\n"
13481"/// \\returns An unsigned integer whose least significant bits contain the\n"
13482"/// extracted bits.\n"
13483"/// \\see _bextr_u32\n"
13484"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
13485"__bextr_u32(unsigned int __X, unsigned int __Y)\n"
13486"{\n"
13487" return __builtin_ia32_bextr_u32(__X, __Y);\n"
13488"}\n"
13489"\n"
13490"/* Intel-specified, single-leading-underscore version of BEXTR */\n"
13491"/// Extracts the specified bits from the first operand and returns them\n"
13492"/// in the least significant bits of the result.\n"
13493"///\n"
13494"/// \\headerfile <x86intrin.h>\n"
13495"///\n"
13496"/// This intrinsic corresponds to the <c> BEXTR </c> instruction.\n"
13497"///\n"
13498"/// \\param __X\n"
13499"/// An unsigned integer whose bits are to be extracted.\n"
13500"/// \\param __Y\n"
13501"/// An unsigned integer used to specify the index of the least significant\n"
13502"/// bit for the bits to be extracted. Bits [7:0] specify the index.\n"
13503"/// \\param __Z\n"
13504"/// An unsigned integer used to specify the number of bits to be extracted.\n"
13505"/// Bits [7:0] specify the number of bits.\n"
13506"/// \\returns An unsigned integer whose least significant bits contain the\n"
13507"/// extracted bits.\n"
13508"/// \\see __bextr_u32\n"
13509"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
13510"_bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z)\n"
13511"{\n"
13512" return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));\n"
13513"}\n"
13514"\n"
13515"/// Clears all bits in the source except for the least significant bit\n"
13516"/// containing a value of 1 and returns the result.\n"
13517"///\n"
13518"/// \\headerfile <x86intrin.h>\n"
13519"///\n"
13520"/// This intrinsic corresponds to the <c> BLSI </c> instruction.\n"
13521"///\n"
13522"/// \\param __X\n"
13523"/// An unsigned integer whose bits are to be cleared.\n"
13524"/// \\returns An unsigned integer containing the result of clearing the bits from\n"
13525"/// the source operand.\n"
13526"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
13527"__blsi_u32(unsigned int __X)\n"
13528"{\n"
13529" return __X & -__X;\n"
13530"}\n"
13531"\n"
13532"/// Creates a mask whose bits are set to 1, using bit 0 up to and\n"
13533"/// including the least significant bit that is set to 1 in the source\n"
13534"/// operand and returns the result.\n"
13535"///\n"
13536"/// \\headerfile <x86intrin.h>\n"
13537"///\n"
13538"/// This intrinsic corresponds to the <c> BLSMSK </c> instruction.\n"
13539"///\n"
13540"/// \\param __X\n"
13541"/// An unsigned integer used to create the mask.\n"
13542"/// \\returns An unsigned integer containing the newly created mask.\n"
13543"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
13544"__blsmsk_u32(unsigned int __X)\n"
13545"{\n"
13546" return __X ^ (__X - 1);\n"
13547"}\n"
13548"\n"
13549"/// Clears the least significant bit that is set to 1 in the source\n"
13550"/// operand and returns the result.\n"
13551"///\n"
13552"/// \\headerfile <x86intrin.h>\n"
13553"///\n"
13554"/// This intrinsic corresponds to the <c> BLSR </c> instruction.\n"
13555"///\n"
13556"/// \\param __X\n"
13557"/// An unsigned integer containing the operand to be cleared.\n"
13558"/// \\returns An unsigned integer containing the result of clearing the source\n"
13559"/// operand.\n"
13560"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
13561"__blsr_u32(unsigned int __X)\n"
13562"{\n"
13563" return __X & (__X - 1);\n"
13564"}\n"
13565"\n"
13566"/// Counts the number of trailing zero bits in the operand.\n"
13567"///\n"
13568"/// \\headerfile <x86intrin.h>\n"
13569"///\n"
13570"/// This intrinsic corresponds to the <c> TZCNT </c> instruction.\n"
13571"///\n"
13572"/// \\param __X\n"
13573"/// An unsigned 32-bit integer whose trailing zeros are to be counted.\n"
13574"/// \\returns An unsigned 32-bit integer containing the number of trailing zero\n"
13575"/// bits in the operand.\n"
13576"static __inline__ unsigned int __RELAXED_FN_ATTRS\n"
13577"__tzcnt_u32(unsigned int __X)\n"
13578"{\n"
13579" return __X ? __builtin_ctz(__X) : 32;\n"
13580"}\n"
13581"\n"
13582"/// Counts the number of trailing zero bits in the operand.\n"
13583"///\n"
13584"/// \\headerfile <x86intrin.h>\n"
13585"///\n"
13586"/// This intrinsic corresponds to the <c> TZCNT </c> instruction.\n"
13587"///\n"
13588"/// \\param __X\n"
13589"/// An unsigned 32-bit integer whose trailing zeros are to be counted.\n"
13590"/// \\returns An 32-bit integer containing the number of trailing zero bits in\n"
13591"/// the operand.\n"
13592"static __inline__ int __RELAXED_FN_ATTRS\n"
13593"_mm_tzcnt_32(unsigned int __X)\n"
13594"{\n"
13595" return __X ? __builtin_ctz(__X) : 32;\n"
13596"}\n"
13597"\n"
13598"#ifdef __x86_64__\n"
13599"\n"
13600"#define _andn_u64(a, b) (__andn_u64((a), (b)))\n"
13601"\n"
13602"/* _bextr_u64 != __bextr_u64 */\n"
13603"#define _blsi_u64(a) (__blsi_u64((a)))\n"
13604"\n"
13605"#define _blsmsk_u64(a) (__blsmsk_u64((a)))\n"
13606"\n"
13607"#define _blsr_u64(a) (__blsr_u64((a)))\n"
13608"\n"
13609"#define _tzcnt_u64(a) (__tzcnt_u64((a)))\n"
13610"\n"
13611"/// Performs a bitwise AND of the second operand with the one's\n"
13612"/// complement of the first operand.\n"
13613"///\n"
13614"/// \\headerfile <x86intrin.h>\n"
13615"///\n"
13616"/// This intrinsic corresponds to the <c> ANDN </c> instruction.\n"
13617"///\n"
13618"/// \\param __X\n"
13619"/// An unsigned 64-bit integer containing one of the operands.\n"
13620"/// \\param __Y\n"
13621"/// An unsigned 64-bit integer containing one of the operands.\n"
13622"/// \\returns An unsigned 64-bit integer containing the bitwise AND of the second\n"
13623"/// operand with the one's complement of the first operand.\n"
13624"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
13625"__andn_u64 (unsigned long long __X, unsigned long long __Y)\n"
13626"{\n"
13627" return ~__X & __Y;\n"
13628"}\n"
13629"\n"
13630"/* AMD-specified, double-leading-underscore version of BEXTR */\n"
13631"/// Extracts the specified bits from the first operand and returns them\n"
13632"/// in the least significant bits of the result.\n"
13633"///\n"
13634"/// \\headerfile <x86intrin.h>\n"
13635"///\n"
13636"/// This intrinsic corresponds to the <c> BEXTR </c> instruction.\n"
13637"///\n"
13638"/// \\param __X\n"
13639"/// An unsigned 64-bit integer whose bits are to be extracted.\n"
13640"/// \\param __Y\n"
13641"/// An unsigned 64-bit integer used to specify which bits are extracted. Bits\n"
13642"/// [7:0] specify the index of the least significant bit. Bits [15:8] specify\n"
13643"/// the number of bits to be extracted.\n"
13644"/// \\returns An unsigned 64-bit integer whose least significant bits contain the\n"
13645"/// extracted bits.\n"
13646"/// \\see _bextr_u64\n"
13647"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
13648"__bextr_u64(unsigned long long __X, unsigned long long __Y)\n"
13649"{\n"
13650" return __builtin_ia32_bextr_u64(__X, __Y);\n"
13651"}\n"
13652"\n"
13653"/* Intel-specified, single-leading-underscore version of BEXTR */\n"
13654"/// Extracts the specified bits from the first operand and returns them\n"
13655"/// in the least significant bits of the result.\n"
13656"///\n"
13657"/// \\headerfile <x86intrin.h>\n"
13658"///\n"
13659"/// This intrinsic corresponds to the <c> BEXTR </c> instruction.\n"
13660"///\n"
13661"/// \\param __X\n"
13662"/// An unsigned 64-bit integer whose bits are to be extracted.\n"
13663"/// \\param __Y\n"
13664"/// An unsigned integer used to specify the index of the least significant\n"
13665"/// bit for the bits to be extracted. Bits [7:0] specify the index.\n"
13666"/// \\param __Z\n"
13667"/// An unsigned integer used to specify the number of bits to be extracted.\n"
13668"/// Bits [7:0] specify the number of bits.\n"
13669"/// \\returns An unsigned 64-bit integer whose least significant bits contain the\n"
13670"/// extracted bits.\n"
13671"/// \\see __bextr_u64\n"
13672"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
13673"_bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z)\n"
13674"{\n"
13675" return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));\n"
13676"}\n"
13677"\n"
13678"/// Clears all bits in the source except for the least significant bit\n"
13679"/// containing a value of 1 and returns the result.\n"
13680"///\n"
13681"/// \\headerfile <x86intrin.h>\n"
13682"///\n"
13683"/// This intrinsic corresponds to the <c> BLSI </c> instruction.\n"
13684"///\n"
13685"/// \\param __X\n"
13686"/// An unsigned 64-bit integer whose bits are to be cleared.\n"
13687"/// \\returns An unsigned 64-bit integer containing the result of clearing the\n"
13688"/// bits from the source operand.\n"
13689"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
13690"__blsi_u64(unsigned long long __X)\n"
13691"{\n"
13692" return __X & -__X;\n"
13693"}\n"
13694"\n"
13695"/// Creates a mask whose bits are set to 1, using bit 0 up to and\n"
13696"/// including the least significant bit that is set to 1 in the source\n"
13697"/// operand and returns the result.\n"
13698"///\n"
13699"/// \\headerfile <x86intrin.h>\n"
13700"///\n"
13701"/// This intrinsic corresponds to the <c> BLSMSK </c> instruction.\n"
13702"///\n"
13703"/// \\param __X\n"
13704"/// An unsigned 64-bit integer used to create the mask.\n"
13705"/// \\returns An unsigned 64-bit integer containing the newly created mask.\n"
13706"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
13707"__blsmsk_u64(unsigned long long __X)\n"
13708"{\n"
13709" return __X ^ (__X - 1);\n"
13710"}\n"
13711"\n"
13712"/// Clears the least significant bit that is set to 1 in the source\n"
13713"/// operand and returns the result.\n"
13714"///\n"
13715"/// \\headerfile <x86intrin.h>\n"
13716"///\n"
13717"/// This intrinsic corresponds to the <c> BLSR </c> instruction.\n"
13718"///\n"
13719"/// \\param __X\n"
13720"/// An unsigned 64-bit integer containing the operand to be cleared.\n"
13721"/// \\returns An unsigned 64-bit integer containing the result of clearing the\n"
13722"/// source operand.\n"
13723"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
13724"__blsr_u64(unsigned long long __X)\n"
13725"{\n"
13726" return __X & (__X - 1);\n"
13727"}\n"
13728"\n"
13729"/// Counts the number of trailing zero bits in the operand.\n"
13730"///\n"
13731"/// \\headerfile <x86intrin.h>\n"
13732"///\n"
13733"/// This intrinsic corresponds to the <c> TZCNT </c> instruction.\n"
13734"///\n"
13735"/// \\param __X\n"
13736"/// An unsigned 64-bit integer whose trailing zeros are to be counted.\n"
13737"/// \\returns An unsigned 64-bit integer containing the number of trailing zero\n"
13738"/// bits in the operand.\n"
13739"static __inline__ unsigned long long __RELAXED_FN_ATTRS\n"
13740"__tzcnt_u64(unsigned long long __X)\n"
13741"{\n"
13742" return __X ? __builtin_ctzll(__X) : 64;\n"
13743"}\n"
13744"\n"
13745"/// Counts the number of trailing zero bits in the operand.\n"
13746"///\n"
13747"/// \\headerfile <x86intrin.h>\n"
13748"///\n"
13749"/// This intrinsic corresponds to the <c> TZCNT </c> instruction.\n"
13750"///\n"
13751"/// \\param __X\n"
13752"/// An unsigned 64-bit integer whose trailing zeros are to be counted.\n"
13753"/// \\returns An 64-bit integer containing the number of trailing zero bits in\n"
13754"/// the operand.\n"
13755"static __inline__ long long __RELAXED_FN_ATTRS\n"
13756"_mm_tzcnt_64(unsigned long long __X)\n"
13757"{\n"
13758" return __X ? __builtin_ctzll(__X) : 64;\n"
13759"}\n"
13760"\n"
13761"#endif /* __x86_64__ */\n"
13762"\n"
13763"#undef __DEFAULT_FN_ATTRS\n"
13764"#undef __RELAXED_FN_ATTRS\n"
13765"\n"
13766"#endif /* __BMIINTRIN_H */\n"
13767"" } ,
13768 { "/builtins/cetintrin.h" , "/*===---- cetintrin.h - CET intrinsic --------------------------------------===\n"
13769" *\n"
13770" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
13771" * of this software and associated documentation files (the \"Software\"), to deal\n"
13772" * in the Software without restriction, including without limitation the rights\n"
13773" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
13774" * copies of the Software, and to permit persons to whom the Software is\n"
13775" * furnished to do so, subject to the following conditions:\n"
13776" *\n"
13777" * The above copyright notice and this permission notice shall be included in\n"
13778" * all copies or substantial portions of the Software.\n"
13779" *\n"
13780" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
13781" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
13782" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
13783" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
13784" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
13785" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
13786" * THE SOFTWARE.\n"
13787" *\n"
13788" *===-----------------------------------------------------------------------===\n"
13789" */\n"
13790"\n"
13791"#ifndef __IMMINTRIN_H\n"
13792"#error \"Never use <cetintrin.h> directly; include <immintrin.h> instead.\"\n"
13793"#endif\n"
13794"\n"
13795"#ifndef __CETINTRIN_H\n"
13796"#define __CETINTRIN_H\n"
13797"\n"
13798"/* Define the default attributes for the functions in this file. */\n"
13799"#define __DEFAULT_FN_ATTRS \\\n"
13800" __attribute__((__always_inline__, __nodebug__, __target__(\"shstk\")))\n"
13801"\n"
13802"static __inline__ void __DEFAULT_FN_ATTRS _incsspd(int __a) {\n"
13803" __builtin_ia32_incsspd(__a);\n"
13804"}\n"
13805"\n"
13806"#ifdef __x86_64__\n"
13807"static __inline__ void __DEFAULT_FN_ATTRS _incsspq(unsigned long long __a) {\n"
13808" __builtin_ia32_incsspq(__a);\n"
13809"}\n"
13810"#endif /* __x86_64__ */\n"
13811"\n"
13812"#ifdef __x86_64__\n"
13813"static __inline__ void __DEFAULT_FN_ATTRS _inc_ssp(unsigned int __a) {\n"
13814" __builtin_ia32_incsspq(__a);\n"
13815"}\n"
13816"#else /* __x86_64__ */\n"
13817"static __inline__ void __DEFAULT_FN_ATTRS _inc_ssp(unsigned int __a) {\n"
13818" __builtin_ia32_incsspd((int)__a);\n"
13819"}\n"
13820"#endif /* __x86_64__ */\n"
13821"\n"
13822"static __inline__ unsigned int __DEFAULT_FN_ATTRS _rdsspd(unsigned int __a) {\n"
13823" return __builtin_ia32_rdsspd(__a);\n"
13824"}\n"
13825"\n"
13826"#ifdef __x86_64__\n"
13827"static __inline__ unsigned long long __DEFAULT_FN_ATTRS _rdsspq(unsigned long long __a) {\n"
13828" return __builtin_ia32_rdsspq(__a);\n"
13829"}\n"
13830"#endif /* __x86_64__ */\n"
13831"\n"
13832"#ifdef __x86_64__\n"
13833"static __inline__ unsigned long long __DEFAULT_FN_ATTRS _get_ssp(void) {\n"
13834" return __builtin_ia32_rdsspq(0);\n"
13835"}\n"
13836"#else /* __x86_64__ */\n"
13837"static __inline__ unsigned int __DEFAULT_FN_ATTRS _get_ssp(void) {\n"
13838" return __builtin_ia32_rdsspd(0);\n"
13839"}\n"
13840"#endif /* __x86_64__ */\n"
13841"\n"
13842"static __inline__ void __DEFAULT_FN_ATTRS _saveprevssp() {\n"
13843" __builtin_ia32_saveprevssp();\n"
13844"}\n"
13845"\n"
13846"static __inline__ void __DEFAULT_FN_ATTRS _rstorssp(void * __p) {\n"
13847" __builtin_ia32_rstorssp(__p);\n"
13848"}\n"
13849"\n"
13850"static __inline__ void __DEFAULT_FN_ATTRS _wrssd(unsigned int __a, void * __p) {\n"
13851" __builtin_ia32_wrssd(__a, __p);\n"
13852"}\n"
13853"\n"
13854"#ifdef __x86_64__\n"
13855"static __inline__ void __DEFAULT_FN_ATTRS _wrssq(unsigned long long __a, void * __p) {\n"
13856" __builtin_ia32_wrssq(__a, __p);\n"
13857"}\n"
13858"#endif /* __x86_64__ */\n"
13859"\n"
13860"static __inline__ void __DEFAULT_FN_ATTRS _wrussd(unsigned int __a, void * __p) {\n"
13861" __builtin_ia32_wrussd(__a, __p);\n"
13862"}\n"
13863"\n"
13864"#ifdef __x86_64__\n"
13865"static __inline__ void __DEFAULT_FN_ATTRS _wrussq(unsigned long long __a, void * __p) {\n"
13866" __builtin_ia32_wrussq(__a, __p);\n"
13867"}\n"
13868"#endif /* __x86_64__ */\n"
13869"\n"
13870"static __inline__ void __DEFAULT_FN_ATTRS _setssbsy() {\n"
13871" __builtin_ia32_setssbsy();\n"
13872"}\n"
13873"\n"
13874"static __inline__ void __DEFAULT_FN_ATTRS _clrssbsy(void * __p) {\n"
13875" __builtin_ia32_clrssbsy(__p);\n"
13876"}\n"
13877"\n"
13878"#undef __DEFAULT_FN_ATTRS\n"
13879"\n"
13880"#endif /* __CETINTRIN_H */\n"
13881"" } ,
13882 { "/builtins/cldemoteintrin.h" , "/*===---- cldemoteintrin.h - CLDEMOTE intrinsic ----------------------------===\n"
13883" *\n"
13884" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
13885" * of this software and associated documentation files (the \"Software\"), to deal\n"
13886" * in the Software without restriction, including without limitation the rights\n"
13887" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
13888" * copies of the Software, and to permit persons to whom the Software is\n"
13889" * furnished to do so, subject to the following conditions:\n"
13890" *\n"
13891" * The above copyright notice and this permission notice shall be included in\n"
13892" * all copies or substantial portions of the Software.\n"
13893" *\n"
13894" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
13895" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
13896" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
13897" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
13898" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
13899" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
13900" * THE SOFTWARE.\n"
13901" *\n"
13902" *===-----------------------------------------------------------------------===\n"
13903" */\n"
13904"\n"
13905"#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n"
13906"#error \"Never use <cldemoteintrin.h> directly; include <x86intrin.h> instead.\"\n"
13907"#endif\n"
13908"\n"
13909"#ifndef __CLDEMOTEINTRIN_H\n"
13910"#define __CLDEMOTEINTRIN_H\n"
13911"\n"
13912"/* Define the default attributes for the functions in this file. */\n"
13913"#define __DEFAULT_FN_ATTRS \\\n"
13914" __attribute__((__always_inline__, __nodebug__, __target__(\"cldemote\")))\n"
13915"\n"
13916"static __inline__ void __DEFAULT_FN_ATTRS\n"
13917"_cldemote(const void * __P) {\n"
13918" __builtin_ia32_cldemote(__P);\n"
13919"}\n"
13920"\n"
13921"#undef __DEFAULT_FN_ATTRS\n"
13922"\n"
13923"#endif\n"
13924"" } ,
13925 { "/builtins/clflushoptintrin.h" , "/*===---- clflushoptintrin.h - CLFLUSHOPT intrinsic ------------------------===\n"
13926" *\n"
13927" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
13928" * of this software and associated documentation files (the \"Software\"), to deal\n"
13929" * in the Software without restriction, including without limitation the rights\n"
13930" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
13931" * copies of the Software, and to permit persons to whom the Software is\n"
13932" * furnished to do so, subject to the following conditions:\n"
13933" *\n"
13934" * The above copyright notice and this permission notice shall be included in\n"
13935" * all copies or substantial portions of the Software.\n"
13936" *\n"
13937" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
13938" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
13939" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
13940" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
13941" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
13942" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
13943" * THE SOFTWARE.\n"
13944" *\n"
13945" *===-----------------------------------------------------------------------===\n"
13946" */\n"
13947"\n"
13948"#ifndef __IMMINTRIN_H\n"
13949"#error \"Never use <clflushoptintrin.h> directly; include <immintrin.h> instead.\"\n"
13950"#endif\n"
13951"\n"
13952"#ifndef __CLFLUSHOPTINTRIN_H\n"
13953"#define __CLFLUSHOPTINTRIN_H\n"
13954"\n"
13955"/* Define the default attributes for the functions in this file. */\n"
13956"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"clflushopt\")))\n"
13957"\n"
13958"static __inline__ void __DEFAULT_FN_ATTRS\n"
13959"_mm_clflushopt(void const * __m) {\n"
13960" __builtin_ia32_clflushopt(__m);\n"
13961"}\n"
13962"\n"
13963"#undef __DEFAULT_FN_ATTRS\n"
13964"\n"
13965"#endif\n"
13966"" } ,
13967 { "/builtins/clwbintrin.h" , "/*===---- clwbintrin.h - CLWB intrinsic ------------------------------------===\n"
13968" *\n"
13969" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
13970" * of this software and associated documentation files (the \"Software\"), to deal\n"
13971" * in the Software without restriction, including without limitation the rights\n"
13972" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
13973" * copies of the Software, and to permit persons to whom the Software is\n"
13974" * furnished to do so, subject to the following conditions:\n"
13975" *\n"
13976" * The above copyright notice and this permission notice shall be included in\n"
13977" * all copies or substantial portions of the Software.\n"
13978" *\n"
13979" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
13980" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
13981" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
13982" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
13983" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
13984" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
13985" * THE SOFTWARE.\n"
13986" *\n"
13987" *===-----------------------------------------------------------------------===\n"
13988" */\n"
13989"\n"
13990"#ifndef __IMMINTRIN_H\n"
13991"#error \"Never use <clwbintrin.h> directly; include <immintrin.h> instead.\"\n"
13992"#endif\n"
13993"\n"
13994"#ifndef __CLWBINTRIN_H\n"
13995"#define __CLWBINTRIN_H\n"
13996"\n"
13997"/* Define the default attributes for the functions in this file. */\n"
13998"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"clwb\")))\n"
13999"\n"
14000"/// Writes back to memory the cache line (if modified) that contains the\n"
14001"/// linear address specified in \\a __p from any level of the cache hierarchy in\n"
14002"/// the cache coherence domain\n"
14003"///\n"
14004"/// \\headerfile <immintrin.h>\n"
14005"///\n"
14006"/// This intrinsic corresponds to the <c> CLWB </c> instruction.\n"
14007"///\n"
14008"/// \\param __p\n"
14009"/// A pointer to the memory location used to identify the cache line to be\n"
14010"/// written back.\n"
14011"static __inline__ void __DEFAULT_FN_ATTRS\n"
14012"_mm_clwb(void const *__p) {\n"
14013" __builtin_ia32_clwb(__p);\n"
14014"}\n"
14015"\n"
14016"#undef __DEFAULT_FN_ATTRS\n"
14017"\n"
14018"#endif\n"
14019"" } ,
14020 { "/builtins/clzerointrin.h" , "/*===----------------------- clzerointrin.h - CLZERO ----------------------===\n"
14021" *\n"
14022" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
14023" * of this software and associated documentation files (the \"Software\"), to deal\n"
14024" * in the Software without restriction, including without limitation the rights\n"
14025" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
14026" * copies of the Software, and to permit persons to whom the Software is\n"
14027" * furnished to do so, subject to the following conditions:\n"
14028" *\n"
14029" * The above copyright notice and this permission notice shall be included in\n"
14030" * all copies or substantial portions of the Software.\n"
14031" *\n"
14032" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
14033" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
14034" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
14035" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
14036" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
14037" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
14038" * THE SOFTWARE.\n"
14039" *\n"
14040" *===-----------------------------------------------------------------------===\n"
14041" */\n"
14042"#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n"
14043"#error \"Never use <clzerointrin.h> directly; include <x86intrin.h> instead.\"\n"
14044"#endif\n"
14045"\n"
14046"#ifndef __CLZEROINTRIN_H\n"
14047"#define __CLZEROINTRIN_H\n"
14048"\n"
14049"/* Define the default attributes for the functions in this file. */\n"
14050"#define __DEFAULT_FN_ATTRS \\\n"
14051" __attribute__((__always_inline__, __nodebug__, __target__(\"clzero\")))\n"
14052"\n"
14053"/// Loads the cache line address and zero's out the cacheline\n"
14054"///\n"
14055"/// \\headerfile <clzerointrin.h>\n"
14056"///\n"
14057"/// This intrinsic corresponds to the <c> CLZERO </c> instruction.\n"
14058"///\n"
14059"/// \\param __line\n"
14060"/// A pointer to a cacheline which needs to be zeroed out.\n"
14061"static __inline__ void __DEFAULT_FN_ATTRS\n"
14062"_mm_clzero (void * __line)\n"
14063"{\n"
14064" __builtin_ia32_clzero ((void *)__line);\n"
14065"}\n"
14066"\n"
14067"#undef __DEFAULT_FN_ATTRS\n"
14068"\n"
14069"#endif /* __CLZEROINTRIN_H */\n"
14070"" } ,
14071 { "/builtins/cpuid.h" , "/*===---- cpuid.h - X86 cpu model detection --------------------------------===\n"
14072" *\n"
14073" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
14074" * of this software and associated documentation files (the \"Software\"), to deal\n"
14075" * in the Software without restriction, including without limitation the rights\n"
14076" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
14077" * copies of the Software, and to permit persons to whom the Software is\n"
14078" * furnished to do so, subject to the following conditions:\n"
14079" *\n"
14080" * The above copyright notice and this permission notice shall be included in\n"
14081" * all copies or substantial portions of the Software.\n"
14082" *\n"
14083" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
14084" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
14085" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
14086" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
14087" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
14088" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
14089" * THE SOFTWARE.\n"
14090" *\n"
14091" *===-----------------------------------------------------------------------===\n"
14092" */\n"
14093"\n"
14094"#if !(__x86_64__ || __i386__)\n"
14095"#error this header is for x86 only\n"
14096"#endif\n"
14097"\n"
14098"/* Responses identification request with %eax 0 */\n"
14099"/* AMD: \"AuthenticAMD\" */\n"
14100"#define signature_AMD_ebx 0x68747541\n"
14101"#define signature_AMD_edx 0x69746e65\n"
14102"#define signature_AMD_ecx 0x444d4163\n"
14103"/* CENTAUR: \"CentaurHauls\" */\n"
14104"#define signature_CENTAUR_ebx 0x746e6543\n"
14105"#define signature_CENTAUR_edx 0x48727561\n"
14106"#define signature_CENTAUR_ecx 0x736c7561\n"
14107"/* CYRIX: \"CyrixInstead\" */\n"
14108"#define signature_CYRIX_ebx 0x69727943\n"
14109"#define signature_CYRIX_edx 0x736e4978\n"
14110"#define signature_CYRIX_ecx 0x64616574\n"
14111"/* INTEL: \"GenuineIntel\" */\n"
14112"#define signature_INTEL_ebx 0x756e6547\n"
14113"#define signature_INTEL_edx 0x49656e69\n"
14114"#define signature_INTEL_ecx 0x6c65746e\n"
14115"/* TM1: \"TransmetaCPU\" */\n"
14116"#define signature_TM1_ebx 0x6e617254\n"
14117"#define signature_TM1_edx 0x74656d73\n"
14118"#define signature_TM1_ecx 0x55504361\n"
14119"/* TM2: \"GenuineTMx86\" */\n"
14120"#define signature_TM2_ebx 0x756e6547\n"
14121"#define signature_TM2_edx 0x54656e69\n"
14122"#define signature_TM2_ecx 0x3638784d\n"
14123"/* NSC: \"Geode by NSC\" */\n"
14124"#define signature_NSC_ebx 0x646f6547\n"
14125"#define signature_NSC_edx 0x43534e20\n"
14126"#define signature_NSC_ecx 0x79622065\n"
14127"/* NEXGEN: \"NexGenDriven\" */\n"
14128"#define signature_NEXGEN_ebx 0x4778654e\n"
14129"#define signature_NEXGEN_edx 0x72446e65\n"
14130"#define signature_NEXGEN_ecx 0x6e657669\n"
14131"/* RISE: \"RiseRiseRise\" */\n"
14132"#define signature_RISE_ebx 0x65736952\n"
14133"#define signature_RISE_edx 0x65736952\n"
14134"#define signature_RISE_ecx 0x65736952\n"
14135"/* SIS: \"SiS SiS SiS \" */\n"
14136"#define signature_SIS_ebx 0x20536953\n"
14137"#define signature_SIS_edx 0x20536953\n"
14138"#define signature_SIS_ecx 0x20536953\n"
14139"/* UMC: \"UMC UMC UMC \" */\n"
14140"#define signature_UMC_ebx 0x20434d55\n"
14141"#define signature_UMC_edx 0x20434d55\n"
14142"#define signature_UMC_ecx 0x20434d55\n"
14143"/* VIA: \"VIA VIA VIA \" */\n"
14144"#define signature_VIA_ebx 0x20414956\n"
14145"#define signature_VIA_edx 0x20414956\n"
14146"#define signature_VIA_ecx 0x20414956\n"
14147"/* VORTEX: \"Vortex86 SoC\" */\n"
14148"#define signature_VORTEX_ebx 0x74726f56\n"
14149"#define signature_VORTEX_edx 0x36387865\n"
14150"#define signature_VORTEX_ecx 0x436f5320\n"
14151"\n"
14152"/* Features in %ecx for leaf 1 */\n"
14153"#define bit_SSE3 0x00000001\n"
14154"#define bit_PCLMULQDQ 0x00000002\n"
14155"#define bit_PCLMUL bit_PCLMULQDQ /* for gcc compat */\n"
14156"#define bit_DTES64 0x00000004\n"
14157"#define bit_MONITOR 0x00000008\n"
14158"#define bit_DSCPL 0x00000010\n"
14159"#define bit_VMX 0x00000020\n"
14160"#define bit_SMX 0x00000040\n"
14161"#define bit_EIST 0x00000080\n"
14162"#define bit_TM2 0x00000100\n"
14163"#define bit_SSSE3 0x00000200\n"
14164"#define bit_CNXTID 0x00000400\n"
14165"#define bit_FMA 0x00001000\n"
14166"#define bit_CMPXCHG16B 0x00002000\n"
14167"#define bit_xTPR 0x00004000\n"
14168"#define bit_PDCM 0x00008000\n"
14169"#define bit_PCID 0x00020000\n"
14170"#define bit_DCA 0x00040000\n"
14171"#define bit_SSE41 0x00080000\n"
14172"#define bit_SSE4_1 bit_SSE41 /* for gcc compat */\n"
14173"#define bit_SSE42 0x00100000\n"
14174"#define bit_SSE4_2 bit_SSE42 /* for gcc compat */\n"
14175"#define bit_x2APIC 0x00200000\n"
14176"#define bit_MOVBE 0x00400000\n"
14177"#define bit_POPCNT 0x00800000\n"
14178"#define bit_TSCDeadline 0x01000000\n"
14179"#define bit_AESNI 0x02000000\n"
14180"#define bit_AES bit_AESNI /* for gcc compat */\n"
14181"#define bit_XSAVE 0x04000000\n"
14182"#define bit_OSXSAVE 0x08000000\n"
14183"#define bit_AVX 0x10000000\n"
14184"#define bit_F16C 0x20000000\n"
14185"#define bit_RDRND 0x40000000\n"
14186"\n"
14187"/* Features in %edx for leaf 1 */\n"
14188"#define bit_FPU 0x00000001\n"
14189"#define bit_VME 0x00000002\n"
14190"#define bit_DE 0x00000004\n"
14191"#define bit_PSE 0x00000008\n"
14192"#define bit_TSC 0x00000010\n"
14193"#define bit_MSR 0x00000020\n"
14194"#define bit_PAE 0x00000040\n"
14195"#define bit_MCE 0x00000080\n"
14196"#define bit_CX8 0x00000100\n"
14197"#define bit_CMPXCHG8B bit_CX8 /* for gcc compat */\n"
14198"#define bit_APIC 0x00000200\n"
14199"#define bit_SEP 0x00000800\n"
14200"#define bit_MTRR 0x00001000\n"
14201"#define bit_PGE 0x00002000\n"
14202"#define bit_MCA 0x00004000\n"
14203"#define bit_CMOV 0x00008000\n"
14204"#define bit_PAT 0x00010000\n"
14205"#define bit_PSE36 0x00020000\n"
14206"#define bit_PSN 0x00040000\n"
14207"#define bit_CLFSH 0x00080000\n"
14208"#define bit_DS 0x00200000\n"
14209"#define bit_ACPI 0x00400000\n"
14210"#define bit_MMX 0x00800000\n"
14211"#define bit_FXSR 0x01000000\n"
14212"#define bit_FXSAVE bit_FXSR /* for gcc compat */\n"
14213"#define bit_SSE 0x02000000\n"
14214"#define bit_SSE2 0x04000000\n"
14215"#define bit_SS 0x08000000\n"
14216"#define bit_HTT 0x10000000\n"
14217"#define bit_TM 0x20000000\n"
14218"#define bit_PBE 0x80000000\n"
14219"\n"
14220"/* Features in %ebx for leaf 7 sub-leaf 0 */\n"
14221"#define bit_FSGSBASE 0x00000001\n"
14222"#define bit_SGX 0x00000004\n"
14223"#define bit_BMI 0x00000008\n"
14224"#define bit_HLE 0x00000010\n"
14225"#define bit_AVX2 0x00000020\n"
14226"#define bit_SMEP 0x00000080\n"
14227"#define bit_BMI2 0x00000100\n"
14228"#define bit_ENH_MOVSB 0x00000200\n"
14229"#define bit_INVPCID 0x00000400\n"
14230"#define bit_RTM 0x00000800\n"
14231"#define bit_MPX 0x00004000\n"
14232"#define bit_AVX512F 0x00010000\n"
14233"#define bit_AVX512DQ 0x00020000\n"
14234"#define bit_RDSEED 0x00040000\n"
14235"#define bit_ADX 0x00080000\n"
14236"#define bit_AVX512IFMA 0x00200000\n"
14237"#define bit_CLFLUSHOPT 0x00800000\n"
14238"#define bit_CLWB 0x01000000\n"
14239"#define bit_AVX512PF 0x04000000\n"
14240"#define bit_AVX512ER 0x08000000\n"
14241"#define bit_AVX512CD 0x10000000\n"
14242"#define bit_SHA 0x20000000\n"
14243"#define bit_AVX512BW 0x40000000\n"
14244"#define bit_AVX512VL 0x80000000\n"
14245"\n"
14246"/* Features in %ecx for leaf 7 sub-leaf 0 */\n"
14247"#define bit_PREFTCHWT1 0x00000001\n"
14248"#define bit_AVX512VBMI 0x00000002\n"
14249"#define bit_PKU 0x00000004\n"
14250"#define bit_OSPKE 0x00000010\n"
14251"#define bit_WAITPKG 0x00000020\n"
14252"#define bit_AVX512VBMI2 0x00000040\n"
14253"#define bit_SHSTK 0x00000080\n"
14254"#define bit_GFNI 0x00000100\n"
14255"#define bit_VAES 0x00000200\n"
14256"#define bit_VPCLMULQDQ 0x00000400\n"
14257"#define bit_AVX512VNNI 0x00000800\n"
14258"#define bit_AVX512BITALG 0x00001000\n"
14259"#define bit_AVX512VPOPCNTDQ 0x00004000\n"
14260"#define bit_RDPID 0x00400000\n"
14261"#define bit_CLDEMOTE 0x02000000\n"
14262"#define bit_MOVDIRI 0x08000000\n"
14263"#define bit_MOVDIR64B 0x10000000\n"
14264"\n"
14265"/* Features in %edx for leaf 7 sub-leaf 0 */\n"
14266"#define bit_AVX5124VNNIW 0x00000004\n"
14267"#define bit_AVX5124FMAPS 0x00000008\n"
14268"#define bit_PCONFIG 0x00040000\n"
14269"#define bit_IBT 0x00100000\n"
14270"\n"
14271"/* Features in %eax for leaf 13 sub-leaf 1 */\n"
14272"#define bit_XSAVEOPT 0x00000001\n"
14273"#define bit_XSAVEC 0x00000002\n"
14274"#define bit_XSAVES 0x00000008\n"
14275"\n"
14276"/* Features in %eax for leaf 0x14 sub-leaf 0 */\n"
14277"#define bit_PTWRITE 0x00000010\n"
14278"\n"
14279"/* Features in %ecx for leaf 0x80000001 */\n"
14280"#define bit_LAHF_LM 0x00000001\n"
14281"#define bit_ABM 0x00000020\n"
14282"#define bit_LZCNT bit_ABM /* for gcc compat */\n"
14283"#define bit_SSE4a 0x00000040\n"
14284"#define bit_PRFCHW 0x00000100\n"
14285"#define bit_XOP 0x00000800\n"
14286"#define bit_LWP 0x00008000\n"
14287"#define bit_FMA4 0x00010000\n"
14288"#define bit_TBM 0x00200000\n"
14289"#define bit_MWAITX 0x20000000\n"
14290"\n"
14291"/* Features in %edx for leaf 0x80000001 */\n"
14292"#define bit_MMXEXT 0x00400000\n"
14293"#define bit_LM 0x20000000\n"
14294"#define bit_3DNOWP 0x40000000\n"
14295"#define bit_3DNOW 0x80000000\n"
14296"\n"
14297"/* Features in %ebx for leaf 0x80000008 */\n"
14298"#define bit_CLZERO 0x00000001\n"
14299"#define bit_WBNOINVD 0x00000200\n"
14300"\n"
14301"\n"
14302"#if __i386__\n"
14303"#define __cpuid(__leaf, __eax, __ebx, __ecx, __edx) \\\n"
14304" __asm(\"cpuid\" : \"=a\"(__eax), \"=b\" (__ebx), \"=c\"(__ecx), \"=d\"(__edx) \\\n"
14305" : \"0\"(__leaf))\n"
14306"\n"
14307"#define __cpuid_count(__leaf, __count, __eax, __ebx, __ecx, __edx) \\\n"
14308" __asm(\"cpuid\" : \"=a\"(__eax), \"=b\" (__ebx), \"=c\"(__ecx), \"=d\"(__edx) \\\n"
14309" : \"0\"(__leaf), \"2\"(__count))\n"
14310"#else\n"
14311"/* x86-64 uses %rbx as the base register, so preserve it. */\n"
14312"#define __cpuid(__leaf, __eax, __ebx, __ecx, __edx) \\\n"
14313" __asm(\" xchgq %%rbx,%q1\\n\" \\\n"
14314" \" cpuid\\n\" \\\n"
14315" \" xchgq %%rbx,%q1\" \\\n"
14316" : \"=a\"(__eax), \"=r\" (__ebx), \"=c\"(__ecx), \"=d\"(__edx) \\\n"
14317" : \"0\"(__leaf))\n"
14318"\n"
14319"#define __cpuid_count(__leaf, __count, __eax, __ebx, __ecx, __edx) \\\n"
14320" __asm(\" xchgq %%rbx,%q1\\n\" \\\n"
14321" \" cpuid\\n\" \\\n"
14322" \" xchgq %%rbx,%q1\" \\\n"
14323" : \"=a\"(__eax), \"=r\" (__ebx), \"=c\"(__ecx), \"=d\"(__edx) \\\n"
14324" : \"0\"(__leaf), \"2\"(__count))\n"
14325"#endif\n"
14326"\n"
14327"static __inline int __get_cpuid_max (unsigned int __leaf, unsigned int *__sig)\n"
14328"{\n"
14329" unsigned int __eax, __ebx, __ecx, __edx;\n"
14330"#if __i386__\n"
14331" int __cpuid_supported;\n"
14332"\n"
14333" __asm(\" pushfl\\n\"\n"
14334" \" popl %%eax\\n\"\n"
14335" \" movl %%eax,%%ecx\\n\"\n"
14336" \" xorl $0x00200000,%%eax\\n\"\n"
14337" \" pushl %%eax\\n\"\n"
14338" \" popfl\\n\"\n"
14339" \" pushfl\\n\"\n"
14340" \" popl %%eax\\n\"\n"
14341" \" movl $0,%0\\n\"\n"
14342" \" cmpl %%eax,%%ecx\\n\"\n"
14343" \" je 1f\\n\"\n"
14344" \" movl $1,%0\\n\"\n"
14345" \"1:\"\n"
14346" : \"=r\" (__cpuid_supported) : : \"eax\", \"ecx\");\n"
14347" if (!__cpuid_supported)\n"
14348" return 0;\n"
14349"#endif\n"
14350"\n"
14351" __cpuid(__leaf, __eax, __ebx, __ecx, __edx);\n"
14352" if (__sig)\n"
14353" *__sig = __ebx;\n"
14354" return __eax;\n"
14355"}\n"
14356"\n"
14357"static __inline int __get_cpuid (unsigned int __leaf, unsigned int *__eax,\n"
14358" unsigned int *__ebx, unsigned int *__ecx,\n"
14359" unsigned int *__edx)\n"
14360"{\n"
14361" unsigned int __max_leaf = __get_cpuid_max(__leaf & 0x80000000, 0);\n"
14362"\n"
14363" if (__max_leaf == 0 || __max_leaf < __leaf)\n"
14364" return 0;\n"
14365"\n"
14366" __cpuid(__leaf, *__eax, *__ebx, *__ecx, *__edx);\n"
14367" return 1;\n"
14368"}\n"
14369"\n"
14370"static __inline int __get_cpuid_count (unsigned int __leaf,\n"
14371" unsigned int __subleaf,\n"
14372" unsigned int *__eax, unsigned int *__ebx,\n"
14373" unsigned int *__ecx, unsigned int *__edx)\n"
14374"{\n"
14375" unsigned int __max_leaf = __get_cpuid_max(__leaf & 0x80000000, 0);\n"
14376"\n"
14377" if (__max_leaf == 0 || __max_leaf < __leaf)\n"
14378" return 0;\n"
14379"\n"
14380" __cpuid_count(__leaf, __subleaf, *__eax, *__ebx, *__ecx, *__edx);\n"
14381" return 1;\n"
14382"}\n"
14383"" } ,
14384 { "/builtins/emmintrin.h" , "/*===---- emmintrin.h - SSE2 intrinsics ------------------------------------===\n"
14385" *\n"
14386" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
14387" * of this software and associated documentation files (the \"Software\"), to deal\n"
14388" * in the Software without restriction, including without limitation the rights\n"
14389" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
14390" * copies of the Software, and to permit persons to whom the Software is\n"
14391" * furnished to do so, subject to the following conditions:\n"
14392" *\n"
14393" * The above copyright notice and this permission notice shall be included in\n"
14394" * all copies or substantial portions of the Software.\n"
14395" *\n"
14396" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
14397" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
14398" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
14399" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
14400" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
14401" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
14402" * THE SOFTWARE.\n"
14403" *\n"
14404" *===-----------------------------------------------------------------------===\n"
14405" */\n"
14406"\n"
14407"#ifndef __EMMINTRIN_H\n"
14408"#define __EMMINTRIN_H\n"
14409"\n"
14410"#include <xmmintrin.h>\n"
14411"\n"
14412"typedef double __m128d __attribute__((__vector_size__(16)));\n"
14413"typedef long long __m128i __attribute__((__vector_size__(16)));\n"
14414"\n"
14415"/* Type defines. */\n"
14416"typedef double __v2df __attribute__ ((__vector_size__ (16)));\n"
14417"typedef long long __v2di __attribute__ ((__vector_size__ (16)));\n"
14418"typedef short __v8hi __attribute__((__vector_size__(16)));\n"
14419"typedef char __v16qi __attribute__((__vector_size__(16)));\n"
14420"\n"
14421"/* Unsigned types */\n"
14422"typedef unsigned long long __v2du __attribute__ ((__vector_size__ (16)));\n"
14423"typedef unsigned short __v8hu __attribute__((__vector_size__(16)));\n"
14424"typedef unsigned char __v16qu __attribute__((__vector_size__(16)));\n"
14425"\n"
14426"/* We need an explicitly signed variant for char. Note that this shouldn't\n"
14427" * appear in the interface though. */\n"
14428"typedef signed char __v16qs __attribute__((__vector_size__(16)));\n"
14429"\n"
14430"/* Define the default attributes for the functions in this file. */\n"
14431"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"sse2\"), __min_vector_width__(128)))\n"
14432"#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__(\"mmx,sse2\"), __min_vector_width__(64)))\n"
14433"\n"
14434"/// Adds lower double-precision values in both operands and returns the\n"
14435"/// sum in the lower 64 bits of the result. The upper 64 bits of the result\n"
14436"/// are copied from the upper double-precision value of the first operand.\n"
14437"///\n"
14438"/// \\headerfile <x86intrin.h>\n"
14439"///\n"
14440"/// This intrinsic corresponds to the <c> VADDSD / ADDSD </c> instruction.\n"
14441"///\n"
14442"/// \\param __a\n"
14443"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
14444"/// \\param __b\n"
14445"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
14446"/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n"
14447"/// sum of the lower 64 bits of both operands. The upper 64 bits are copied\n"
14448"/// from the upper 64 bits of the first source operand.\n"
14449"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14450"_mm_add_sd(__m128d __a, __m128d __b)\n"
14451"{\n"
14452" __a[0] += __b[0];\n"
14453" return __a;\n"
14454"}\n"
14455"\n"
14456"/// Adds two 128-bit vectors of [2 x double].\n"
14457"///\n"
14458"/// \\headerfile <x86intrin.h>\n"
14459"///\n"
14460"/// This intrinsic corresponds to the <c> VADDPD / ADDPD </c> instruction.\n"
14461"///\n"
14462"/// \\param __a\n"
14463"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
14464"/// \\param __b\n"
14465"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
14466"/// \\returns A 128-bit vector of [2 x double] containing the sums of both\n"
14467"/// operands.\n"
14468"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14469"_mm_add_pd(__m128d __a, __m128d __b)\n"
14470"{\n"
14471" return (__m128d)((__v2df)__a + (__v2df)__b);\n"
14472"}\n"
14473"\n"
14474"/// Subtracts the lower double-precision value of the second operand\n"
14475"/// from the lower double-precision value of the first operand and returns\n"
14476"/// the difference in the lower 64 bits of the result. The upper 64 bits of\n"
14477"/// the result are copied from the upper double-precision value of the first\n"
14478"/// operand.\n"
14479"///\n"
14480"/// \\headerfile <x86intrin.h>\n"
14481"///\n"
14482"/// This intrinsic corresponds to the <c> VSUBSD / SUBSD </c> instruction.\n"
14483"///\n"
14484"/// \\param __a\n"
14485"/// A 128-bit vector of [2 x double] containing the minuend.\n"
14486"/// \\param __b\n"
14487"/// A 128-bit vector of [2 x double] containing the subtrahend.\n"
14488"/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n"
14489"/// difference of the lower 64 bits of both operands. The upper 64 bits are\n"
14490"/// copied from the upper 64 bits of the first source operand.\n"
14491"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14492"_mm_sub_sd(__m128d __a, __m128d __b)\n"
14493"{\n"
14494" __a[0] -= __b[0];\n"
14495" return __a;\n"
14496"}\n"
14497"\n"
14498"/// Subtracts two 128-bit vectors of [2 x double].\n"
14499"///\n"
14500"/// \\headerfile <x86intrin.h>\n"
14501"///\n"
14502"/// This intrinsic corresponds to the <c> VSUBPD / SUBPD </c> instruction.\n"
14503"///\n"
14504"/// \\param __a\n"
14505"/// A 128-bit vector of [2 x double] containing the minuend.\n"
14506"/// \\param __b\n"
14507"/// A 128-bit vector of [2 x double] containing the subtrahend.\n"
14508"/// \\returns A 128-bit vector of [2 x double] containing the differences between\n"
14509"/// both operands.\n"
14510"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14511"_mm_sub_pd(__m128d __a, __m128d __b)\n"
14512"{\n"
14513" return (__m128d)((__v2df)__a - (__v2df)__b);\n"
14514"}\n"
14515"\n"
14516"/// Multiplies lower double-precision values in both operands and returns\n"
14517"/// the product in the lower 64 bits of the result. The upper 64 bits of the\n"
14518"/// result are copied from the upper double-precision value of the first\n"
14519"/// operand.\n"
14520"///\n"
14521"/// \\headerfile <x86intrin.h>\n"
14522"///\n"
14523"/// This intrinsic corresponds to the <c> VMULSD / MULSD </c> instruction.\n"
14524"///\n"
14525"/// \\param __a\n"
14526"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
14527"/// \\param __b\n"
14528"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
14529"/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n"
14530"/// product of the lower 64 bits of both operands. The upper 64 bits are\n"
14531"/// copied from the upper 64 bits of the first source operand.\n"
14532"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14533"_mm_mul_sd(__m128d __a, __m128d __b)\n"
14534"{\n"
14535" __a[0] *= __b[0];\n"
14536" return __a;\n"
14537"}\n"
14538"\n"
14539"/// Multiplies two 128-bit vectors of [2 x double].\n"
14540"///\n"
14541"/// \\headerfile <x86intrin.h>\n"
14542"///\n"
14543"/// This intrinsic corresponds to the <c> VMULPD / MULPD </c> instruction.\n"
14544"///\n"
14545"/// \\param __a\n"
14546"/// A 128-bit vector of [2 x double] containing one of the operands.\n"
14547"/// \\param __b\n"
14548"/// A 128-bit vector of [2 x double] containing one of the operands.\n"
14549"/// \\returns A 128-bit vector of [2 x double] containing the products of both\n"
14550"/// operands.\n"
14551"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14552"_mm_mul_pd(__m128d __a, __m128d __b)\n"
14553"{\n"
14554" return (__m128d)((__v2df)__a * (__v2df)__b);\n"
14555"}\n"
14556"\n"
14557"/// Divides the lower double-precision value of the first operand by the\n"
14558"/// lower double-precision value of the second operand and returns the\n"
14559"/// quotient in the lower 64 bits of the result. The upper 64 bits of the\n"
14560"/// result are copied from the upper double-precision value of the first\n"
14561"/// operand.\n"
14562"///\n"
14563"/// \\headerfile <x86intrin.h>\n"
14564"///\n"
14565"/// This intrinsic corresponds to the <c> VDIVSD / DIVSD </c> instruction.\n"
14566"///\n"
14567"/// \\param __a\n"
14568"/// A 128-bit vector of [2 x double] containing the dividend.\n"
14569"/// \\param __b\n"
14570"/// A 128-bit vector of [2 x double] containing divisor.\n"
14571"/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n"
14572"/// quotient of the lower 64 bits of both operands. The upper 64 bits are\n"
14573"/// copied from the upper 64 bits of the first source operand.\n"
14574"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14575"_mm_div_sd(__m128d __a, __m128d __b)\n"
14576"{\n"
14577" __a[0] /= __b[0];\n"
14578" return __a;\n"
14579"}\n"
14580"\n"
14581"/// Performs an element-by-element division of two 128-bit vectors of\n"
14582"/// [2 x double].\n"
14583"///\n"
14584"/// \\headerfile <x86intrin.h>\n"
14585"///\n"
14586"/// This intrinsic corresponds to the <c> VDIVPD / DIVPD </c> instruction.\n"
14587"///\n"
14588"/// \\param __a\n"
14589"/// A 128-bit vector of [2 x double] containing the dividend.\n"
14590"/// \\param __b\n"
14591"/// A 128-bit vector of [2 x double] containing the divisor.\n"
14592"/// \\returns A 128-bit vector of [2 x double] containing the quotients of both\n"
14593"/// operands.\n"
14594"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14595"_mm_div_pd(__m128d __a, __m128d __b)\n"
14596"{\n"
14597" return (__m128d)((__v2df)__a / (__v2df)__b);\n"
14598"}\n"
14599"\n"
14600"/// Calculates the square root of the lower double-precision value of\n"
14601"/// the second operand and returns it in the lower 64 bits of the result.\n"
14602"/// The upper 64 bits of the result are copied from the upper\n"
14603"/// double-precision value of the first operand.\n"
14604"///\n"
14605"/// \\headerfile <x86intrin.h>\n"
14606"///\n"
14607"/// This intrinsic corresponds to the <c> VSQRTSD / SQRTSD </c> instruction.\n"
14608"///\n"
14609"/// \\param __a\n"
14610"/// A 128-bit vector of [2 x double] containing one of the operands. The\n"
14611"/// upper 64 bits of this operand are copied to the upper 64 bits of the\n"
14612"/// result.\n"
14613"/// \\param __b\n"
14614"/// A 128-bit vector of [2 x double] containing one of the operands. The\n"
14615"/// square root is calculated using the lower 64 bits of this operand.\n"
14616"/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n"
14617"/// square root of the lower 64 bits of operand \\a __b, and whose upper 64\n"
14618"/// bits are copied from the upper 64 bits of operand \\a __a.\n"
14619"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14620"_mm_sqrt_sd(__m128d __a, __m128d __b)\n"
14621"{\n"
14622" __m128d __c = __builtin_ia32_sqrtsd((__v2df)__b);\n"
14623" return __extension__ (__m128d) { __c[0], __a[1] };\n"
14624"}\n"
14625"\n"
14626"/// Calculates the square root of the each of two values stored in a\n"
14627"/// 128-bit vector of [2 x double].\n"
14628"///\n"
14629"/// \\headerfile <x86intrin.h>\n"
14630"///\n"
14631"/// This intrinsic corresponds to the <c> VSQRTPD / SQRTPD </c> instruction.\n"
14632"///\n"
14633"/// \\param __a\n"
14634"/// A 128-bit vector of [2 x double].\n"
14635"/// \\returns A 128-bit vector of [2 x double] containing the square roots of the\n"
14636"/// values in the operand.\n"
14637"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14638"_mm_sqrt_pd(__m128d __a)\n"
14639"{\n"
14640" return __builtin_ia32_sqrtpd((__v2df)__a);\n"
14641"}\n"
14642"\n"
14643"/// Compares lower 64-bit double-precision values of both operands, and\n"
14644"/// returns the lesser of the pair of values in the lower 64-bits of the\n"
14645"/// result. The upper 64 bits of the result are copied from the upper\n"
14646"/// double-precision value of the first operand.\n"
14647"///\n"
14648"/// \\headerfile <x86intrin.h>\n"
14649"///\n"
14650"/// This intrinsic corresponds to the <c> VMINSD / MINSD </c> instruction.\n"
14651"///\n"
14652"/// \\param __a\n"
14653"/// A 128-bit vector of [2 x double] containing one of the operands. The\n"
14654"/// lower 64 bits of this operand are used in the comparison.\n"
14655"/// \\param __b\n"
14656"/// A 128-bit vector of [2 x double] containing one of the operands. The\n"
14657"/// lower 64 bits of this operand are used in the comparison.\n"
14658"/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n"
14659"/// minimum value between both operands. The upper 64 bits are copied from\n"
14660"/// the upper 64 bits of the first source operand.\n"
14661"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14662"_mm_min_sd(__m128d __a, __m128d __b)\n"
14663"{\n"
14664" return __builtin_ia32_minsd((__v2df)__a, (__v2df)__b);\n"
14665"}\n"
14666"\n"
14667"/// Performs element-by-element comparison of the two 128-bit vectors of\n"
14668"/// [2 x double] and returns the vector containing the lesser of each pair of\n"
14669"/// values.\n"
14670"///\n"
14671"/// \\headerfile <x86intrin.h>\n"
14672"///\n"
14673"/// This intrinsic corresponds to the <c> VMINPD / MINPD </c> instruction.\n"
14674"///\n"
14675"/// \\param __a\n"
14676"/// A 128-bit vector of [2 x double] containing one of the operands.\n"
14677"/// \\param __b\n"
14678"/// A 128-bit vector of [2 x double] containing one of the operands.\n"
14679"/// \\returns A 128-bit vector of [2 x double] containing the minimum values\n"
14680"/// between both operands.\n"
14681"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14682"_mm_min_pd(__m128d __a, __m128d __b)\n"
14683"{\n"
14684" return __builtin_ia32_minpd((__v2df)__a, (__v2df)__b);\n"
14685"}\n"
14686"\n"
14687"/// Compares lower 64-bit double-precision values of both operands, and\n"
14688"/// returns the greater of the pair of values in the lower 64-bits of the\n"
14689"/// result. The upper 64 bits of the result are copied from the upper\n"
14690"/// double-precision value of the first operand.\n"
14691"///\n"
14692"/// \\headerfile <x86intrin.h>\n"
14693"///\n"
14694"/// This intrinsic corresponds to the <c> VMAXSD / MAXSD </c> instruction.\n"
14695"///\n"
14696"/// \\param __a\n"
14697"/// A 128-bit vector of [2 x double] containing one of the operands. The\n"
14698"/// lower 64 bits of this operand are used in the comparison.\n"
14699"/// \\param __b\n"
14700"/// A 128-bit vector of [2 x double] containing one of the operands. The\n"
14701"/// lower 64 bits of this operand are used in the comparison.\n"
14702"/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n"
14703"/// maximum value between both operands. The upper 64 bits are copied from\n"
14704"/// the upper 64 bits of the first source operand.\n"
14705"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14706"_mm_max_sd(__m128d __a, __m128d __b)\n"
14707"{\n"
14708" return __builtin_ia32_maxsd((__v2df)__a, (__v2df)__b);\n"
14709"}\n"
14710"\n"
14711"/// Performs element-by-element comparison of the two 128-bit vectors of\n"
14712"/// [2 x double] and returns the vector containing the greater of each pair\n"
14713"/// of values.\n"
14714"///\n"
14715"/// \\headerfile <x86intrin.h>\n"
14716"///\n"
14717"/// This intrinsic corresponds to the <c> VMAXPD / MAXPD </c> instruction.\n"
14718"///\n"
14719"/// \\param __a\n"
14720"/// A 128-bit vector of [2 x double] containing one of the operands.\n"
14721"/// \\param __b\n"
14722"/// A 128-bit vector of [2 x double] containing one of the operands.\n"
14723"/// \\returns A 128-bit vector of [2 x double] containing the maximum values\n"
14724"/// between both operands.\n"
14725"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14726"_mm_max_pd(__m128d __a, __m128d __b)\n"
14727"{\n"
14728" return __builtin_ia32_maxpd((__v2df)__a, (__v2df)__b);\n"
14729"}\n"
14730"\n"
14731"/// Performs a bitwise AND of two 128-bit vectors of [2 x double].\n"
14732"///\n"
14733"/// \\headerfile <x86intrin.h>\n"
14734"///\n"
14735"/// This intrinsic corresponds to the <c> VPAND / PAND </c> instruction.\n"
14736"///\n"
14737"/// \\param __a\n"
14738"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
14739"/// \\param __b\n"
14740"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
14741"/// \\returns A 128-bit vector of [2 x double] containing the bitwise AND of the\n"
14742"/// values between both operands.\n"
14743"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14744"_mm_and_pd(__m128d __a, __m128d __b)\n"
14745"{\n"
14746" return (__m128d)((__v2du)__a & (__v2du)__b);\n"
14747"}\n"
14748"\n"
14749"/// Performs a bitwise AND of two 128-bit vectors of [2 x double], using\n"
14750"/// the one's complement of the values contained in the first source operand.\n"
14751"///\n"
14752"/// \\headerfile <x86intrin.h>\n"
14753"///\n"
14754"/// This intrinsic corresponds to the <c> VPANDN / PANDN </c> instruction.\n"
14755"///\n"
14756"/// \\param __a\n"
14757"/// A 128-bit vector of [2 x double] containing the left source operand. The\n"
14758"/// one's complement of this value is used in the bitwise AND.\n"
14759"/// \\param __b\n"
14760"/// A 128-bit vector of [2 x double] containing the right source operand.\n"
14761"/// \\returns A 128-bit vector of [2 x double] containing the bitwise AND of the\n"
14762"/// values in the second operand and the one's complement of the first\n"
14763"/// operand.\n"
14764"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14765"_mm_andnot_pd(__m128d __a, __m128d __b)\n"
14766"{\n"
14767" return (__m128d)(~(__v2du)__a & (__v2du)__b);\n"
14768"}\n"
14769"\n"
14770"/// Performs a bitwise OR of two 128-bit vectors of [2 x double].\n"
14771"///\n"
14772"/// \\headerfile <x86intrin.h>\n"
14773"///\n"
14774"/// This intrinsic corresponds to the <c> VPOR / POR </c> instruction.\n"
14775"///\n"
14776"/// \\param __a\n"
14777"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
14778"/// \\param __b\n"
14779"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
14780"/// \\returns A 128-bit vector of [2 x double] containing the bitwise OR of the\n"
14781"/// values between both operands.\n"
14782"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14783"_mm_or_pd(__m128d __a, __m128d __b)\n"
14784"{\n"
14785" return (__m128d)((__v2du)__a | (__v2du)__b);\n"
14786"}\n"
14787"\n"
14788"/// Performs a bitwise XOR of two 128-bit vectors of [2 x double].\n"
14789"///\n"
14790"/// \\headerfile <x86intrin.h>\n"
14791"///\n"
14792"/// This intrinsic corresponds to the <c> VPXOR / PXOR </c> instruction.\n"
14793"///\n"
14794"/// \\param __a\n"
14795"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
14796"/// \\param __b\n"
14797"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
14798"/// \\returns A 128-bit vector of [2 x double] containing the bitwise XOR of the\n"
14799"/// values between both operands.\n"
14800"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14801"_mm_xor_pd(__m128d __a, __m128d __b)\n"
14802"{\n"
14803" return (__m128d)((__v2du)__a ^ (__v2du)__b);\n"
14804"}\n"
14805"\n"
14806"/// Compares each of the corresponding double-precision values of the\n"
14807"/// 128-bit vectors of [2 x double] for equality. Each comparison yields 0x0\n"
14808"/// for false, 0xFFFFFFFFFFFFFFFF for true.\n"
14809"///\n"
14810"/// \\headerfile <x86intrin.h>\n"
14811"///\n"
14812"/// This intrinsic corresponds to the <c> VCMPEQPD / CMPEQPD </c> instruction.\n"
14813"///\n"
14814"/// \\param __a\n"
14815"/// A 128-bit vector of [2 x double].\n"
14816"/// \\param __b\n"
14817"/// A 128-bit vector of [2 x double].\n"
14818"/// \\returns A 128-bit vector containing the comparison results.\n"
14819"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14820"_mm_cmpeq_pd(__m128d __a, __m128d __b)\n"
14821"{\n"
14822" return (__m128d)__builtin_ia32_cmpeqpd((__v2df)__a, (__v2df)__b);\n"
14823"}\n"
14824"\n"
14825"/// Compares each of the corresponding double-precision values of the\n"
14826"/// 128-bit vectors of [2 x double] to determine if the values in the first\n"
14827"/// operand are less than those in the second operand. Each comparison\n"
14828"/// yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
14829"///\n"
14830"/// \\headerfile <x86intrin.h>\n"
14831"///\n"
14832"/// This intrinsic corresponds to the <c> VCMPLTPD / CMPLTPD </c> instruction.\n"
14833"///\n"
14834"/// \\param __a\n"
14835"/// A 128-bit vector of [2 x double].\n"
14836"/// \\param __b\n"
14837"/// A 128-bit vector of [2 x double].\n"
14838"/// \\returns A 128-bit vector containing the comparison results.\n"
14839"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14840"_mm_cmplt_pd(__m128d __a, __m128d __b)\n"
14841"{\n"
14842" return (__m128d)__builtin_ia32_cmpltpd((__v2df)__a, (__v2df)__b);\n"
14843"}\n"
14844"\n"
14845"/// Compares each of the corresponding double-precision values of the\n"
14846"/// 128-bit vectors of [2 x double] to determine if the values in the first\n"
14847"/// operand are less than or equal to those in the second operand.\n"
14848"///\n"
14849"/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
14850"///\n"
14851"/// \\headerfile <x86intrin.h>\n"
14852"///\n"
14853"/// This intrinsic corresponds to the <c> VCMPLEPD / CMPLEPD </c> instruction.\n"
14854"///\n"
14855"/// \\param __a\n"
14856"/// A 128-bit vector of [2 x double].\n"
14857"/// \\param __b\n"
14858"/// A 128-bit vector of [2 x double].\n"
14859"/// \\returns A 128-bit vector containing the comparison results.\n"
14860"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14861"_mm_cmple_pd(__m128d __a, __m128d __b)\n"
14862"{\n"
14863" return (__m128d)__builtin_ia32_cmplepd((__v2df)__a, (__v2df)__b);\n"
14864"}\n"
14865"\n"
14866"/// Compares each of the corresponding double-precision values of the\n"
14867"/// 128-bit vectors of [2 x double] to determine if the values in the first\n"
14868"/// operand are greater than those in the second operand.\n"
14869"///\n"
14870"/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
14871"///\n"
14872"/// \\headerfile <x86intrin.h>\n"
14873"///\n"
14874"/// This intrinsic corresponds to the <c> VCMPLTPD / CMPLTPD </c> instruction.\n"
14875"///\n"
14876"/// \\param __a\n"
14877"/// A 128-bit vector of [2 x double].\n"
14878"/// \\param __b\n"
14879"/// A 128-bit vector of [2 x double].\n"
14880"/// \\returns A 128-bit vector containing the comparison results.\n"
14881"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14882"_mm_cmpgt_pd(__m128d __a, __m128d __b)\n"
14883"{\n"
14884" return (__m128d)__builtin_ia32_cmpltpd((__v2df)__b, (__v2df)__a);\n"
14885"}\n"
14886"\n"
14887"/// Compares each of the corresponding double-precision values of the\n"
14888"/// 128-bit vectors of [2 x double] to determine if the values in the first\n"
14889"/// operand are greater than or equal to those in the second operand.\n"
14890"///\n"
14891"/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
14892"///\n"
14893"/// \\headerfile <x86intrin.h>\n"
14894"///\n"
14895"/// This intrinsic corresponds to the <c> VCMPLEPD / CMPLEPD </c> instruction.\n"
14896"///\n"
14897"/// \\param __a\n"
14898"/// A 128-bit vector of [2 x double].\n"
14899"/// \\param __b\n"
14900"/// A 128-bit vector of [2 x double].\n"
14901"/// \\returns A 128-bit vector containing the comparison results.\n"
14902"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14903"_mm_cmpge_pd(__m128d __a, __m128d __b)\n"
14904"{\n"
14905" return (__m128d)__builtin_ia32_cmplepd((__v2df)__b, (__v2df)__a);\n"
14906"}\n"
14907"\n"
14908"/// Compares each of the corresponding double-precision values of the\n"
14909"/// 128-bit vectors of [2 x double] to determine if the values in the first\n"
14910"/// operand are ordered with respect to those in the second operand.\n"
14911"///\n"
14912"/// A pair of double-precision values are \"ordered\" with respect to each\n"
14913"/// other if neither value is a NaN. Each comparison yields 0x0 for false,\n"
14914"/// 0xFFFFFFFFFFFFFFFF for true.\n"
14915"///\n"
14916"/// \\headerfile <x86intrin.h>\n"
14917"///\n"
14918"/// This intrinsic corresponds to the <c> VCMPORDPD / CMPORDPD </c> instruction.\n"
14919"///\n"
14920"/// \\param __a\n"
14921"/// A 128-bit vector of [2 x double].\n"
14922"/// \\param __b\n"
14923"/// A 128-bit vector of [2 x double].\n"
14924"/// \\returns A 128-bit vector containing the comparison results.\n"
14925"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14926"_mm_cmpord_pd(__m128d __a, __m128d __b)\n"
14927"{\n"
14928" return (__m128d)__builtin_ia32_cmpordpd((__v2df)__a, (__v2df)__b);\n"
14929"}\n"
14930"\n"
14931"/// Compares each of the corresponding double-precision values of the\n"
14932"/// 128-bit vectors of [2 x double] to determine if the values in the first\n"
14933"/// operand are unordered with respect to those in the second operand.\n"
14934"///\n"
14935"/// A pair of double-precision values are \"unordered\" with respect to each\n"
14936"/// other if one or both values are NaN. Each comparison yields 0x0 for\n"
14937"/// false, 0xFFFFFFFFFFFFFFFF for true.\n"
14938"///\n"
14939"/// \\headerfile <x86intrin.h>\n"
14940"///\n"
14941"/// This intrinsic corresponds to the <c> VCMPUNORDPD / CMPUNORDPD </c>\n"
14942"/// instruction.\n"
14943"///\n"
14944"/// \\param __a\n"
14945"/// A 128-bit vector of [2 x double].\n"
14946"/// \\param __b\n"
14947"/// A 128-bit vector of [2 x double].\n"
14948"/// \\returns A 128-bit vector containing the comparison results.\n"
14949"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14950"_mm_cmpunord_pd(__m128d __a, __m128d __b)\n"
14951"{\n"
14952" return (__m128d)__builtin_ia32_cmpunordpd((__v2df)__a, (__v2df)__b);\n"
14953"}\n"
14954"\n"
14955"/// Compares each of the corresponding double-precision values of the\n"
14956"/// 128-bit vectors of [2 x double] to determine if the values in the first\n"
14957"/// operand are unequal to those in the second operand.\n"
14958"///\n"
14959"/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
14960"///\n"
14961"/// \\headerfile <x86intrin.h>\n"
14962"///\n"
14963"/// This intrinsic corresponds to the <c> VCMPNEQPD / CMPNEQPD </c> instruction.\n"
14964"///\n"
14965"/// \\param __a\n"
14966"/// A 128-bit vector of [2 x double].\n"
14967"/// \\param __b\n"
14968"/// A 128-bit vector of [2 x double].\n"
14969"/// \\returns A 128-bit vector containing the comparison results.\n"
14970"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14971"_mm_cmpneq_pd(__m128d __a, __m128d __b)\n"
14972"{\n"
14973" return (__m128d)__builtin_ia32_cmpneqpd((__v2df)__a, (__v2df)__b);\n"
14974"}\n"
14975"\n"
14976"/// Compares each of the corresponding double-precision values of the\n"
14977"/// 128-bit vectors of [2 x double] to determine if the values in the first\n"
14978"/// operand are not less than those in the second operand.\n"
14979"///\n"
14980"/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
14981"///\n"
14982"/// \\headerfile <x86intrin.h>\n"
14983"///\n"
14984"/// This intrinsic corresponds to the <c> VCMPNLTPD / CMPNLTPD </c> instruction.\n"
14985"///\n"
14986"/// \\param __a\n"
14987"/// A 128-bit vector of [2 x double].\n"
14988"/// \\param __b\n"
14989"/// A 128-bit vector of [2 x double].\n"
14990"/// \\returns A 128-bit vector containing the comparison results.\n"
14991"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
14992"_mm_cmpnlt_pd(__m128d __a, __m128d __b)\n"
14993"{\n"
14994" return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__a, (__v2df)__b);\n"
14995"}\n"
14996"\n"
14997"/// Compares each of the corresponding double-precision values of the\n"
14998"/// 128-bit vectors of [2 x double] to determine if the values in the first\n"
14999"/// operand are not less than or equal to those in the second operand.\n"
15000"///\n"
15001"/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15002"///\n"
15003"/// \\headerfile <x86intrin.h>\n"
15004"///\n"
15005"/// This intrinsic corresponds to the <c> VCMPNLEPD / CMPNLEPD </c> instruction.\n"
15006"///\n"
15007"/// \\param __a\n"
15008"/// A 128-bit vector of [2 x double].\n"
15009"/// \\param __b\n"
15010"/// A 128-bit vector of [2 x double].\n"
15011"/// \\returns A 128-bit vector containing the comparison results.\n"
15012"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15013"_mm_cmpnle_pd(__m128d __a, __m128d __b)\n"
15014"{\n"
15015" return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__a, (__v2df)__b);\n"
15016"}\n"
15017"\n"
15018"/// Compares each of the corresponding double-precision values of the\n"
15019"/// 128-bit vectors of [2 x double] to determine if the values in the first\n"
15020"/// operand are not greater than those in the second operand.\n"
15021"///\n"
15022"/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15023"///\n"
15024"/// \\headerfile <x86intrin.h>\n"
15025"///\n"
15026"/// This intrinsic corresponds to the <c> VCMPNLTPD / CMPNLTPD </c> instruction.\n"
15027"///\n"
15028"/// \\param __a\n"
15029"/// A 128-bit vector of [2 x double].\n"
15030"/// \\param __b\n"
15031"/// A 128-bit vector of [2 x double].\n"
15032"/// \\returns A 128-bit vector containing the comparison results.\n"
15033"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15034"_mm_cmpngt_pd(__m128d __a, __m128d __b)\n"
15035"{\n"
15036" return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__b, (__v2df)__a);\n"
15037"}\n"
15038"\n"
15039"/// Compares each of the corresponding double-precision values of the\n"
15040"/// 128-bit vectors of [2 x double] to determine if the values in the first\n"
15041"/// operand are not greater than or equal to those in the second operand.\n"
15042"///\n"
15043"/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15044"///\n"
15045"/// \\headerfile <x86intrin.h>\n"
15046"///\n"
15047"/// This intrinsic corresponds to the <c> VCMPNLEPD / CMPNLEPD </c> instruction.\n"
15048"///\n"
15049"/// \\param __a\n"
15050"/// A 128-bit vector of [2 x double].\n"
15051"/// \\param __b\n"
15052"/// A 128-bit vector of [2 x double].\n"
15053"/// \\returns A 128-bit vector containing the comparison results.\n"
15054"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15055"_mm_cmpnge_pd(__m128d __a, __m128d __b)\n"
15056"{\n"
15057" return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__b, (__v2df)__a);\n"
15058"}\n"
15059"\n"
15060"/// Compares the lower double-precision floating-point values in each of\n"
15061"/// the two 128-bit floating-point vectors of [2 x double] for equality.\n"
15062"///\n"
15063"/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15064"///\n"
15065"/// \\headerfile <x86intrin.h>\n"
15066"///\n"
15067"/// This intrinsic corresponds to the <c> VCMPEQSD / CMPEQSD </c> instruction.\n"
15068"///\n"
15069"/// \\param __a\n"
15070"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15071"/// compared to the lower double-precision value of \\a __b.\n"
15072"/// \\param __b\n"
15073"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15074"/// compared to the lower double-precision value of \\a __a.\n"
15075"/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n"
15076"/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n"
15077"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15078"_mm_cmpeq_sd(__m128d __a, __m128d __b)\n"
15079"{\n"
15080" return (__m128d)__builtin_ia32_cmpeqsd((__v2df)__a, (__v2df)__b);\n"
15081"}\n"
15082"\n"
15083"/// Compares the lower double-precision floating-point values in each of\n"
15084"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15085"/// the value in the first parameter is less than the corresponding value in\n"
15086"/// the second parameter.\n"
15087"///\n"
15088"/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15089"///\n"
15090"/// \\headerfile <x86intrin.h>\n"
15091"///\n"
15092"/// This intrinsic corresponds to the <c> VCMPLTSD / CMPLTSD </c> instruction.\n"
15093"///\n"
15094"/// \\param __a\n"
15095"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15096"/// compared to the lower double-precision value of \\a __b.\n"
15097"/// \\param __b\n"
15098"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15099"/// compared to the lower double-precision value of \\a __a.\n"
15100"/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n"
15101"/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n"
15102"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15103"_mm_cmplt_sd(__m128d __a, __m128d __b)\n"
15104"{\n"
15105" return (__m128d)__builtin_ia32_cmpltsd((__v2df)__a, (__v2df)__b);\n"
15106"}\n"
15107"\n"
15108"/// Compares the lower double-precision floating-point values in each of\n"
15109"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15110"/// the value in the first parameter is less than or equal to the\n"
15111"/// corresponding value in the second parameter.\n"
15112"///\n"
15113"/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15114"///\n"
15115"/// \\headerfile <x86intrin.h>\n"
15116"///\n"
15117"/// This intrinsic corresponds to the <c> VCMPLESD / CMPLESD </c> instruction.\n"
15118"///\n"
15119"/// \\param __a\n"
15120"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15121"/// compared to the lower double-precision value of \\a __b.\n"
15122"/// \\param __b\n"
15123"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15124"/// compared to the lower double-precision value of \\a __a.\n"
15125"/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n"
15126"/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n"
15127"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15128"_mm_cmple_sd(__m128d __a, __m128d __b)\n"
15129"{\n"
15130" return (__m128d)__builtin_ia32_cmplesd((__v2df)__a, (__v2df)__b);\n"
15131"}\n"
15132"\n"
15133"/// Compares the lower double-precision floating-point values in each of\n"
15134"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15135"/// the value in the first parameter is greater than the corresponding value\n"
15136"/// in the second parameter.\n"
15137"///\n"
15138"/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15139"///\n"
15140"/// \\headerfile <x86intrin.h>\n"
15141"///\n"
15142"/// This intrinsic corresponds to the <c> VCMPLTSD / CMPLTSD </c> instruction.\n"
15143"///\n"
15144"/// \\param __a\n"
15145"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15146"/// compared to the lower double-precision value of \\a __b.\n"
15147"/// \\param __b\n"
15148"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15149"/// compared to the lower double-precision value of \\a __a.\n"
15150"/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n"
15151"/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n"
15152"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15153"_mm_cmpgt_sd(__m128d __a, __m128d __b)\n"
15154"{\n"
15155" __m128d __c = __builtin_ia32_cmpltsd((__v2df)__b, (__v2df)__a);\n"
15156" return __extension__ (__m128d) { __c[0], __a[1] };\n"
15157"}\n"
15158"\n"
15159"/// Compares the lower double-precision floating-point values in each of\n"
15160"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15161"/// the value in the first parameter is greater than or equal to the\n"
15162"/// corresponding value in the second parameter.\n"
15163"///\n"
15164"/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15165"///\n"
15166"/// \\headerfile <x86intrin.h>\n"
15167"///\n"
15168"/// This intrinsic corresponds to the <c> VCMPLESD / CMPLESD </c> instruction.\n"
15169"///\n"
15170"/// \\param __a\n"
15171"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15172"/// compared to the lower double-precision value of \\a __b.\n"
15173"/// \\param __b\n"
15174"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15175"/// compared to the lower double-precision value of \\a __a.\n"
15176"/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n"
15177"/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n"
15178"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15179"_mm_cmpge_sd(__m128d __a, __m128d __b)\n"
15180"{\n"
15181" __m128d __c = __builtin_ia32_cmplesd((__v2df)__b, (__v2df)__a);\n"
15182" return __extension__ (__m128d) { __c[0], __a[1] };\n"
15183"}\n"
15184"\n"
15185"/// Compares the lower double-precision floating-point values in each of\n"
15186"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15187"/// the value in the first parameter is \"ordered\" with respect to the\n"
15188"/// corresponding value in the second parameter.\n"
15189"///\n"
15190"/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. A pair\n"
15191"/// of double-precision values are \"ordered\" with respect to each other if\n"
15192"/// neither value is a NaN.\n"
15193"///\n"
15194"/// \\headerfile <x86intrin.h>\n"
15195"///\n"
15196"/// This intrinsic corresponds to the <c> VCMPORDSD / CMPORDSD </c> instruction.\n"
15197"///\n"
15198"/// \\param __a\n"
15199"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15200"/// compared to the lower double-precision value of \\a __b.\n"
15201"/// \\param __b\n"
15202"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15203"/// compared to the lower double-precision value of \\a __a.\n"
15204"/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n"
15205"/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n"
15206"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15207"_mm_cmpord_sd(__m128d __a, __m128d __b)\n"
15208"{\n"
15209" return (__m128d)__builtin_ia32_cmpordsd((__v2df)__a, (__v2df)__b);\n"
15210"}\n"
15211"\n"
15212"/// Compares the lower double-precision floating-point values in each of\n"
15213"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15214"/// the value in the first parameter is \"unordered\" with respect to the\n"
15215"/// corresponding value in the second parameter.\n"
15216"///\n"
15217"/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. A pair\n"
15218"/// of double-precision values are \"unordered\" with respect to each other if\n"
15219"/// one or both values are NaN.\n"
15220"///\n"
15221"/// \\headerfile <x86intrin.h>\n"
15222"///\n"
15223"/// This intrinsic corresponds to the <c> VCMPUNORDSD / CMPUNORDSD </c>\n"
15224"/// instruction.\n"
15225"///\n"
15226"/// \\param __a\n"
15227"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15228"/// compared to the lower double-precision value of \\a __b.\n"
15229"/// \\param __b\n"
15230"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15231"/// compared to the lower double-precision value of \\a __a.\n"
15232"/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n"
15233"/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n"
15234"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15235"_mm_cmpunord_sd(__m128d __a, __m128d __b)\n"
15236"{\n"
15237" return (__m128d)__builtin_ia32_cmpunordsd((__v2df)__a, (__v2df)__b);\n"
15238"}\n"
15239"\n"
15240"/// Compares the lower double-precision floating-point values in each of\n"
15241"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15242"/// the value in the first parameter is unequal to the corresponding value in\n"
15243"/// the second parameter.\n"
15244"///\n"
15245"/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15246"///\n"
15247"/// \\headerfile <x86intrin.h>\n"
15248"///\n"
15249"/// This intrinsic corresponds to the <c> VCMPNEQSD / CMPNEQSD </c> instruction.\n"
15250"///\n"
15251"/// \\param __a\n"
15252"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15253"/// compared to the lower double-precision value of \\a __b.\n"
15254"/// \\param __b\n"
15255"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15256"/// compared to the lower double-precision value of \\a __a.\n"
15257"/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n"
15258"/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n"
15259"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15260"_mm_cmpneq_sd(__m128d __a, __m128d __b)\n"
15261"{\n"
15262" return (__m128d)__builtin_ia32_cmpneqsd((__v2df)__a, (__v2df)__b);\n"
15263"}\n"
15264"\n"
15265"/// Compares the lower double-precision floating-point values in each of\n"
15266"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15267"/// the value in the first parameter is not less than the corresponding\n"
15268"/// value in the second parameter.\n"
15269"///\n"
15270"/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15271"///\n"
15272"/// \\headerfile <x86intrin.h>\n"
15273"///\n"
15274"/// This intrinsic corresponds to the <c> VCMPNLTSD / CMPNLTSD </c> instruction.\n"
15275"///\n"
15276"/// \\param __a\n"
15277"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15278"/// compared to the lower double-precision value of \\a __b.\n"
15279"/// \\param __b\n"
15280"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15281"/// compared to the lower double-precision value of \\a __a.\n"
15282"/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n"
15283"/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n"
15284"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15285"_mm_cmpnlt_sd(__m128d __a, __m128d __b)\n"
15286"{\n"
15287" return (__m128d)__builtin_ia32_cmpnltsd((__v2df)__a, (__v2df)__b);\n"
15288"}\n"
15289"\n"
15290"/// Compares the lower double-precision floating-point values in each of\n"
15291"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15292"/// the value in the first parameter is not less than or equal to the\n"
15293"/// corresponding value in the second parameter.\n"
15294"///\n"
15295"/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15296"///\n"
15297"/// \\headerfile <x86intrin.h>\n"
15298"///\n"
15299"/// This intrinsic corresponds to the <c> VCMPNLESD / CMPNLESD </c> instruction.\n"
15300"///\n"
15301"/// \\param __a\n"
15302"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15303"/// compared to the lower double-precision value of \\a __b.\n"
15304"/// \\param __b\n"
15305"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15306"/// compared to the lower double-precision value of \\a __a.\n"
15307"/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n"
15308"/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n"
15309"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15310"_mm_cmpnle_sd(__m128d __a, __m128d __b)\n"
15311"{\n"
15312" return (__m128d)__builtin_ia32_cmpnlesd((__v2df)__a, (__v2df)__b);\n"
15313"}\n"
15314"\n"
15315"/// Compares the lower double-precision floating-point values in each of\n"
15316"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15317"/// the value in the first parameter is not greater than the corresponding\n"
15318"/// value in the second parameter.\n"
15319"///\n"
15320"/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15321"///\n"
15322"/// \\headerfile <x86intrin.h>\n"
15323"///\n"
15324"/// This intrinsic corresponds to the <c> VCMPNLTSD / CMPNLTSD </c> instruction.\n"
15325"///\n"
15326"/// \\param __a\n"
15327"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15328"/// compared to the lower double-precision value of \\a __b.\n"
15329"/// \\param __b\n"
15330"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15331"/// compared to the lower double-precision value of \\a __a.\n"
15332"/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n"
15333"/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n"
15334"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15335"_mm_cmpngt_sd(__m128d __a, __m128d __b)\n"
15336"{\n"
15337" __m128d __c = __builtin_ia32_cmpnltsd((__v2df)__b, (__v2df)__a);\n"
15338" return __extension__ (__m128d) { __c[0], __a[1] };\n"
15339"}\n"
15340"\n"
15341"/// Compares the lower double-precision floating-point values in each of\n"
15342"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15343"/// the value in the first parameter is not greater than or equal to the\n"
15344"/// corresponding value in the second parameter.\n"
15345"///\n"
15346"/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n"
15347"///\n"
15348"/// \\headerfile <x86intrin.h>\n"
15349"///\n"
15350"/// This intrinsic corresponds to the <c> VCMPNLESD / CMPNLESD </c> instruction.\n"
15351"///\n"
15352"/// \\param __a\n"
15353"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15354"/// compared to the lower double-precision value of \\a __b.\n"
15355"/// \\param __b\n"
15356"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15357"/// compared to the lower double-precision value of \\a __a.\n"
15358"/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n"
15359"/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n"
15360"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15361"_mm_cmpnge_sd(__m128d __a, __m128d __b)\n"
15362"{\n"
15363" __m128d __c = __builtin_ia32_cmpnlesd((__v2df)__b, (__v2df)__a);\n"
15364" return __extension__ (__m128d) { __c[0], __a[1] };\n"
15365"}\n"
15366"\n"
15367"/// Compares the lower double-precision floating-point values in each of\n"
15368"/// the two 128-bit floating-point vectors of [2 x double] for equality.\n"
15369"///\n"
15370"/// The comparison yields 0 for false, 1 for true. If either of the two\n"
15371"/// lower double-precision values is NaN, 0 is returned.\n"
15372"///\n"
15373"/// \\headerfile <x86intrin.h>\n"
15374"///\n"
15375"/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.\n"
15376"///\n"
15377"/// \\param __a\n"
15378"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15379"/// compared to the lower double-precision value of \\a __b.\n"
15380"/// \\param __b\n"
15381"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15382"/// compared to the lower double-precision value of \\a __a.\n"
15383"/// \\returns An integer containing the comparison results. If either of the two\n"
15384"/// lower double-precision values is NaN, 0 is returned.\n"
15385"static __inline__ int __DEFAULT_FN_ATTRS\n"
15386"_mm_comieq_sd(__m128d __a, __m128d __b)\n"
15387"{\n"
15388" return __builtin_ia32_comisdeq((__v2df)__a, (__v2df)__b);\n"
15389"}\n"
15390"\n"
15391"/// Compares the lower double-precision floating-point values in each of\n"
15392"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15393"/// the value in the first parameter is less than the corresponding value in\n"
15394"/// the second parameter.\n"
15395"///\n"
15396"/// The comparison yields 0 for false, 1 for true. If either of the two\n"
15397"/// lower double-precision values is NaN, 0 is returned.\n"
15398"///\n"
15399"/// \\headerfile <x86intrin.h>\n"
15400"///\n"
15401"/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.\n"
15402"///\n"
15403"/// \\param __a\n"
15404"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15405"/// compared to the lower double-precision value of \\a __b.\n"
15406"/// \\param __b\n"
15407"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15408"/// compared to the lower double-precision value of \\a __a.\n"
15409"/// \\returns An integer containing the comparison results. If either of the two\n"
15410"/// lower double-precision values is NaN, 0 is returned.\n"
15411"static __inline__ int __DEFAULT_FN_ATTRS\n"
15412"_mm_comilt_sd(__m128d __a, __m128d __b)\n"
15413"{\n"
15414" return __builtin_ia32_comisdlt((__v2df)__a, (__v2df)__b);\n"
15415"}\n"
15416"\n"
15417"/// Compares the lower double-precision floating-point values in each of\n"
15418"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15419"/// the value in the first parameter is less than or equal to the\n"
15420"/// corresponding value in the second parameter.\n"
15421"///\n"
15422"/// The comparison yields 0 for false, 1 for true. If either of the two\n"
15423"/// lower double-precision values is NaN, 0 is returned.\n"
15424"///\n"
15425"/// \\headerfile <x86intrin.h>\n"
15426"///\n"
15427"/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.\n"
15428"///\n"
15429"/// \\param __a\n"
15430"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15431"/// compared to the lower double-precision value of \\a __b.\n"
15432"/// \\param __b\n"
15433"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15434"/// compared to the lower double-precision value of \\a __a.\n"
15435"/// \\returns An integer containing the comparison results. If either of the two\n"
15436"/// lower double-precision values is NaN, 0 is returned.\n"
15437"static __inline__ int __DEFAULT_FN_ATTRS\n"
15438"_mm_comile_sd(__m128d __a, __m128d __b)\n"
15439"{\n"
15440" return __builtin_ia32_comisdle((__v2df)__a, (__v2df)__b);\n"
15441"}\n"
15442"\n"
15443"/// Compares the lower double-precision floating-point values in each of\n"
15444"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15445"/// the value in the first parameter is greater than the corresponding value\n"
15446"/// in the second parameter.\n"
15447"///\n"
15448"/// The comparison yields 0 for false, 1 for true. If either of the two\n"
15449"/// lower double-precision values is NaN, 0 is returned.\n"
15450"///\n"
15451"/// \\headerfile <x86intrin.h>\n"
15452"///\n"
15453"/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.\n"
15454"///\n"
15455"/// \\param __a\n"
15456"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15457"/// compared to the lower double-precision value of \\a __b.\n"
15458"/// \\param __b\n"
15459"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15460"/// compared to the lower double-precision value of \\a __a.\n"
15461"/// \\returns An integer containing the comparison results. If either of the two\n"
15462"/// lower double-precision values is NaN, 0 is returned.\n"
15463"static __inline__ int __DEFAULT_FN_ATTRS\n"
15464"_mm_comigt_sd(__m128d __a, __m128d __b)\n"
15465"{\n"
15466" return __builtin_ia32_comisdgt((__v2df)__a, (__v2df)__b);\n"
15467"}\n"
15468"\n"
15469"/// Compares the lower double-precision floating-point values in each of\n"
15470"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15471"/// the value in the first parameter is greater than or equal to the\n"
15472"/// corresponding value in the second parameter.\n"
15473"///\n"
15474"/// The comparison yields 0 for false, 1 for true. If either of the two\n"
15475"/// lower double-precision values is NaN, 0 is returned.\n"
15476"///\n"
15477"/// \\headerfile <x86intrin.h>\n"
15478"///\n"
15479"/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.\n"
15480"///\n"
15481"/// \\param __a\n"
15482"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15483"/// compared to the lower double-precision value of \\a __b.\n"
15484"/// \\param __b\n"
15485"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15486"/// compared to the lower double-precision value of \\a __a.\n"
15487"/// \\returns An integer containing the comparison results. If either of the two\n"
15488"/// lower double-precision values is NaN, 0 is returned.\n"
15489"static __inline__ int __DEFAULT_FN_ATTRS\n"
15490"_mm_comige_sd(__m128d __a, __m128d __b)\n"
15491"{\n"
15492" return __builtin_ia32_comisdge((__v2df)__a, (__v2df)__b);\n"
15493"}\n"
15494"\n"
15495"/// Compares the lower double-precision floating-point values in each of\n"
15496"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15497"/// the value in the first parameter is unequal to the corresponding value in\n"
15498"/// the second parameter.\n"
15499"///\n"
15500"/// The comparison yields 0 for false, 1 for true. If either of the two\n"
15501"/// lower double-precision values is NaN, 1 is returned.\n"
15502"///\n"
15503"/// \\headerfile <x86intrin.h>\n"
15504"///\n"
15505"/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.\n"
15506"///\n"
15507"/// \\param __a\n"
15508"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15509"/// compared to the lower double-precision value of \\a __b.\n"
15510"/// \\param __b\n"
15511"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15512"/// compared to the lower double-precision value of \\a __a.\n"
15513"/// \\returns An integer containing the comparison results. If either of the two\n"
15514"/// lower double-precision values is NaN, 1 is returned.\n"
15515"static __inline__ int __DEFAULT_FN_ATTRS\n"
15516"_mm_comineq_sd(__m128d __a, __m128d __b)\n"
15517"{\n"
15518" return __builtin_ia32_comisdneq((__v2df)__a, (__v2df)__b);\n"
15519"}\n"
15520"\n"
15521"/// Compares the lower double-precision floating-point values in each of\n"
15522"/// the two 128-bit floating-point vectors of [2 x double] for equality. The\n"
15523"/// comparison yields 0 for false, 1 for true.\n"
15524"///\n"
15525"/// If either of the two lower double-precision values is NaN, 0 is returned.\n"
15526"///\n"
15527"/// \\headerfile <x86intrin.h>\n"
15528"///\n"
15529"/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.\n"
15530"///\n"
15531"/// \\param __a\n"
15532"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15533"/// compared to the lower double-precision value of \\a __b.\n"
15534"/// \\param __b\n"
15535"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15536"/// compared to the lower double-precision value of \\a __a.\n"
15537"/// \\returns An integer containing the comparison results. If either of the two\n"
15538"/// lower double-precision values is NaN, 0 is returned.\n"
15539"static __inline__ int __DEFAULT_FN_ATTRS\n"
15540"_mm_ucomieq_sd(__m128d __a, __m128d __b)\n"
15541"{\n"
15542" return __builtin_ia32_ucomisdeq((__v2df)__a, (__v2df)__b);\n"
15543"}\n"
15544"\n"
15545"/// Compares the lower double-precision floating-point values in each of\n"
15546"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15547"/// the value in the first parameter is less than the corresponding value in\n"
15548"/// the second parameter.\n"
15549"///\n"
15550"/// The comparison yields 0 for false, 1 for true. If either of the two lower\n"
15551"/// double-precision values is NaN, 0 is returned.\n"
15552"///\n"
15553"/// \\headerfile <x86intrin.h>\n"
15554"///\n"
15555"/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.\n"
15556"///\n"
15557"/// \\param __a\n"
15558"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15559"/// compared to the lower double-precision value of \\a __b.\n"
15560"/// \\param __b\n"
15561"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15562"/// compared to the lower double-precision value of \\a __a.\n"
15563"/// \\returns An integer containing the comparison results. If either of the two\n"
15564"/// lower double-precision values is NaN, 0 is returned.\n"
15565"static __inline__ int __DEFAULT_FN_ATTRS\n"
15566"_mm_ucomilt_sd(__m128d __a, __m128d __b)\n"
15567"{\n"
15568" return __builtin_ia32_ucomisdlt((__v2df)__a, (__v2df)__b);\n"
15569"}\n"
15570"\n"
15571"/// Compares the lower double-precision floating-point values in each of\n"
15572"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15573"/// the value in the first parameter is less than or equal to the\n"
15574"/// corresponding value in the second parameter.\n"
15575"///\n"
15576"/// The comparison yields 0 for false, 1 for true. If either of the two lower\n"
15577"/// double-precision values is NaN, 0 is returned.\n"
15578"///\n"
15579"/// \\headerfile <x86intrin.h>\n"
15580"///\n"
15581"/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.\n"
15582"///\n"
15583"/// \\param __a\n"
15584"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15585"/// compared to the lower double-precision value of \\a __b.\n"
15586"/// \\param __b\n"
15587"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15588"/// compared to the lower double-precision value of \\a __a.\n"
15589"/// \\returns An integer containing the comparison results. If either of the two\n"
15590"/// lower double-precision values is NaN, 0 is returned.\n"
15591"static __inline__ int __DEFAULT_FN_ATTRS\n"
15592"_mm_ucomile_sd(__m128d __a, __m128d __b)\n"
15593"{\n"
15594" return __builtin_ia32_ucomisdle((__v2df)__a, (__v2df)__b);\n"
15595"}\n"
15596"\n"
15597"/// Compares the lower double-precision floating-point values in each of\n"
15598"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15599"/// the value in the first parameter is greater than the corresponding value\n"
15600"/// in the second parameter.\n"
15601"///\n"
15602"/// The comparison yields 0 for false, 1 for true. If either of the two lower\n"
15603"/// double-precision values is NaN, 0 is returned.\n"
15604"///\n"
15605"/// \\headerfile <x86intrin.h>\n"
15606"///\n"
15607"/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.\n"
15608"///\n"
15609"/// \\param __a\n"
15610"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15611"/// compared to the lower double-precision value of \\a __b.\n"
15612"/// \\param __b\n"
15613"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15614"/// compared to the lower double-precision value of \\a __a.\n"
15615"/// \\returns An integer containing the comparison results. If either of the two\n"
15616"/// lower double-precision values is NaN, 0 is returned.\n"
15617"static __inline__ int __DEFAULT_FN_ATTRS\n"
15618"_mm_ucomigt_sd(__m128d __a, __m128d __b)\n"
15619"{\n"
15620" return __builtin_ia32_ucomisdgt((__v2df)__a, (__v2df)__b);\n"
15621"}\n"
15622"\n"
15623"/// Compares the lower double-precision floating-point values in each of\n"
15624"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15625"/// the value in the first parameter is greater than or equal to the\n"
15626"/// corresponding value in the second parameter.\n"
15627"///\n"
15628"/// The comparison yields 0 for false, 1 for true. If either of the two\n"
15629"/// lower double-precision values is NaN, 0 is returned.\n"
15630"///\n"
15631"/// \\headerfile <x86intrin.h>\n"
15632"///\n"
15633"/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.\n"
15634"///\n"
15635"/// \\param __a\n"
15636"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15637"/// compared to the lower double-precision value of \\a __b.\n"
15638"/// \\param __b\n"
15639"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15640"/// compared to the lower double-precision value of \\a __a.\n"
15641"/// \\returns An integer containing the comparison results. If either of the two\n"
15642"/// lower double-precision values is NaN, 0 is returned.\n"
15643"static __inline__ int __DEFAULT_FN_ATTRS\n"
15644"_mm_ucomige_sd(__m128d __a, __m128d __b)\n"
15645"{\n"
15646" return __builtin_ia32_ucomisdge((__v2df)__a, (__v2df)__b);\n"
15647"}\n"
15648"\n"
15649"/// Compares the lower double-precision floating-point values in each of\n"
15650"/// the two 128-bit floating-point vectors of [2 x double] to determine if\n"
15651"/// the value in the first parameter is unequal to the corresponding value in\n"
15652"/// the second parameter.\n"
15653"///\n"
15654"/// The comparison yields 0 for false, 1 for true. If either of the two lower\n"
15655"/// double-precision values is NaN, 1 is returned.\n"
15656"///\n"
15657"/// \\headerfile <x86intrin.h>\n"
15658"///\n"
15659"/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.\n"
15660"///\n"
15661"/// \\param __a\n"
15662"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15663"/// compared to the lower double-precision value of \\a __b.\n"
15664"/// \\param __b\n"
15665"/// A 128-bit vector of [2 x double]. The lower double-precision value is\n"
15666"/// compared to the lower double-precision value of \\a __a.\n"
15667"/// \\returns An integer containing the comparison result. If either of the two\n"
15668"/// lower double-precision values is NaN, 1 is returned.\n"
15669"static __inline__ int __DEFAULT_FN_ATTRS\n"
15670"_mm_ucomineq_sd(__m128d __a, __m128d __b)\n"
15671"{\n"
15672" return __builtin_ia32_ucomisdneq((__v2df)__a, (__v2df)__b);\n"
15673"}\n"
15674"\n"
15675"/// Converts the two double-precision floating-point elements of a\n"
15676"/// 128-bit vector of [2 x double] into two single-precision floating-point\n"
15677"/// values, returned in the lower 64 bits of a 128-bit vector of [4 x float].\n"
15678"/// The upper 64 bits of the result vector are set to zero.\n"
15679"///\n"
15680"/// \\headerfile <x86intrin.h>\n"
15681"///\n"
15682"/// This intrinsic corresponds to the <c> VCVTPD2PS / CVTPD2PS </c> instruction.\n"
15683"///\n"
15684"/// \\param __a\n"
15685"/// A 128-bit vector of [2 x double].\n"
15686"/// \\returns A 128-bit vector of [4 x float] whose lower 64 bits contain the\n"
15687"/// converted values. The upper 64 bits are set to zero.\n"
15688"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
15689"_mm_cvtpd_ps(__m128d __a)\n"
15690"{\n"
15691" return __builtin_ia32_cvtpd2ps((__v2df)__a);\n"
15692"}\n"
15693"\n"
15694"/// Converts the lower two single-precision floating-point elements of a\n"
15695"/// 128-bit vector of [4 x float] into two double-precision floating-point\n"
15696"/// values, returned in a 128-bit vector of [2 x double]. The upper two\n"
15697"/// elements of the input vector are unused.\n"
15698"///\n"
15699"/// \\headerfile <x86intrin.h>\n"
15700"///\n"
15701"/// This intrinsic corresponds to the <c> VCVTPS2PD / CVTPS2PD </c> instruction.\n"
15702"///\n"
15703"/// \\param __a\n"
15704"/// A 128-bit vector of [4 x float]. The lower two single-precision\n"
15705"/// floating-point elements are converted to double-precision values. The\n"
15706"/// upper two elements are unused.\n"
15707"/// \\returns A 128-bit vector of [2 x double] containing the converted values.\n"
15708"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15709"_mm_cvtps_pd(__m128 __a)\n"
15710"{\n"
15711" return (__m128d) __builtin_convertvector(\n"
15712" __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1), __v2df);\n"
15713"}\n"
15714"\n"
15715"/// Converts the lower two integer elements of a 128-bit vector of\n"
15716"/// [4 x i32] into two double-precision floating-point values, returned in a\n"
15717"/// 128-bit vector of [2 x double].\n"
15718"///\n"
15719"/// The upper two elements of the input vector are unused.\n"
15720"///\n"
15721"/// \\headerfile <x86intrin.h>\n"
15722"///\n"
15723"/// This intrinsic corresponds to the <c> VCVTDQ2PD / CVTDQ2PD </c> instruction.\n"
15724"///\n"
15725"/// \\param __a\n"
15726"/// A 128-bit integer vector of [4 x i32]. The lower two integer elements are\n"
15727"/// converted to double-precision values.\n"
15728"///\n"
15729"/// The upper two elements are unused.\n"
15730"/// \\returns A 128-bit vector of [2 x double] containing the converted values.\n"
15731"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15732"_mm_cvtepi32_pd(__m128i __a)\n"
15733"{\n"
15734" return (__m128d) __builtin_convertvector(\n"
15735" __builtin_shufflevector((__v4si)__a, (__v4si)__a, 0, 1), __v2df);\n"
15736"}\n"
15737"\n"
15738"/// Converts the two double-precision floating-point elements of a\n"
15739"/// 128-bit vector of [2 x double] into two signed 32-bit integer values,\n"
15740"/// returned in the lower 64 bits of a 128-bit vector of [4 x i32]. The upper\n"
15741"/// 64 bits of the result vector are set to zero.\n"
15742"///\n"
15743"/// \\headerfile <x86intrin.h>\n"
15744"///\n"
15745"/// This intrinsic corresponds to the <c> VCVTPD2DQ / CVTPD2DQ </c> instruction.\n"
15746"///\n"
15747"/// \\param __a\n"
15748"/// A 128-bit vector of [2 x double].\n"
15749"/// \\returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the\n"
15750"/// converted values. The upper 64 bits are set to zero.\n"
15751"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
15752"_mm_cvtpd_epi32(__m128d __a)\n"
15753"{\n"
15754" return __builtin_ia32_cvtpd2dq((__v2df)__a);\n"
15755"}\n"
15756"\n"
15757"/// Converts the low-order element of a 128-bit vector of [2 x double]\n"
15758"/// into a 32-bit signed integer value.\n"
15759"///\n"
15760"/// \\headerfile <x86intrin.h>\n"
15761"///\n"
15762"/// This intrinsic corresponds to the <c> VCVTSD2SI / CVTSD2SI </c> instruction.\n"
15763"///\n"
15764"/// \\param __a\n"
15765"/// A 128-bit vector of [2 x double]. The lower 64 bits are used in the\n"
15766"/// conversion.\n"
15767"/// \\returns A 32-bit signed integer containing the converted value.\n"
15768"static __inline__ int __DEFAULT_FN_ATTRS\n"
15769"_mm_cvtsd_si32(__m128d __a)\n"
15770"{\n"
15771" return __builtin_ia32_cvtsd2si((__v2df)__a);\n"
15772"}\n"
15773"\n"
15774"/// Converts the lower double-precision floating-point element of a\n"
15775"/// 128-bit vector of [2 x double], in the second parameter, into a\n"
15776"/// single-precision floating-point value, returned in the lower 32 bits of a\n"
15777"/// 128-bit vector of [4 x float]. The upper 96 bits of the result vector are\n"
15778"/// copied from the upper 96 bits of the first parameter.\n"
15779"///\n"
15780"/// \\headerfile <x86intrin.h>\n"
15781"///\n"
15782"/// This intrinsic corresponds to the <c> VCVTSD2SS / CVTSD2SS </c> instruction.\n"
15783"///\n"
15784"/// \\param __a\n"
15785"/// A 128-bit vector of [4 x float]. The upper 96 bits of this parameter are\n"
15786"/// copied to the upper 96 bits of the result.\n"
15787"/// \\param __b\n"
15788"/// A 128-bit vector of [2 x double]. The lower double-precision\n"
15789"/// floating-point element is used in the conversion.\n"
15790"/// \\returns A 128-bit vector of [4 x float]. The lower 32 bits contain the\n"
15791"/// converted value from the second parameter. The upper 96 bits are copied\n"
15792"/// from the upper 96 bits of the first parameter.\n"
15793"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
15794"_mm_cvtsd_ss(__m128 __a, __m128d __b)\n"
15795"{\n"
15796" return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b);\n"
15797"}\n"
15798"\n"
15799"/// Converts a 32-bit signed integer value, in the second parameter, into\n"
15800"/// a double-precision floating-point value, returned in the lower 64 bits of\n"
15801"/// a 128-bit vector of [2 x double]. The upper 64 bits of the result vector\n"
15802"/// are copied from the upper 64 bits of the first parameter.\n"
15803"///\n"
15804"/// \\headerfile <x86intrin.h>\n"
15805"///\n"
15806"/// This intrinsic corresponds to the <c> VCVTSI2SD / CVTSI2SD </c> instruction.\n"
15807"///\n"
15808"/// \\param __a\n"
15809"/// A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are\n"
15810"/// copied to the upper 64 bits of the result.\n"
15811"/// \\param __b\n"
15812"/// A 32-bit signed integer containing the value to be converted.\n"
15813"/// \\returns A 128-bit vector of [2 x double]. The lower 64 bits contain the\n"
15814"/// converted value from the second parameter. The upper 64 bits are copied\n"
15815"/// from the upper 64 bits of the first parameter.\n"
15816"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15817"_mm_cvtsi32_sd(__m128d __a, int __b)\n"
15818"{\n"
15819" __a[0] = __b;\n"
15820" return __a;\n"
15821"}\n"
15822"\n"
15823"/// Converts the lower single-precision floating-point element of a\n"
15824"/// 128-bit vector of [4 x float], in the second parameter, into a\n"
15825"/// double-precision floating-point value, returned in the lower 64 bits of\n"
15826"/// a 128-bit vector of [2 x double]. The upper 64 bits of the result vector\n"
15827"/// are copied from the upper 64 bits of the first parameter.\n"
15828"///\n"
15829"/// \\headerfile <x86intrin.h>\n"
15830"///\n"
15831"/// This intrinsic corresponds to the <c> VCVTSS2SD / CVTSS2SD </c> instruction.\n"
15832"///\n"
15833"/// \\param __a\n"
15834"/// A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are\n"
15835"/// copied to the upper 64 bits of the result.\n"
15836"/// \\param __b\n"
15837"/// A 128-bit vector of [4 x float]. The lower single-precision\n"
15838"/// floating-point element is used in the conversion.\n"
15839"/// \\returns A 128-bit vector of [2 x double]. The lower 64 bits contain the\n"
15840"/// converted value from the second parameter. The upper 64 bits are copied\n"
15841"/// from the upper 64 bits of the first parameter.\n"
15842"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15843"_mm_cvtss_sd(__m128d __a, __m128 __b)\n"
15844"{\n"
15845" __a[0] = __b[0];\n"
15846" return __a;\n"
15847"}\n"
15848"\n"
15849"/// Converts the two double-precision floating-point elements of a\n"
15850"/// 128-bit vector of [2 x double] into two signed 32-bit integer values,\n"
15851"/// returned in the lower 64 bits of a 128-bit vector of [4 x i32].\n"
15852"///\n"
15853"/// If the result of either conversion is inexact, the result is truncated\n"
15854"/// (rounded towards zero) regardless of the current MXCSR setting. The upper\n"
15855"/// 64 bits of the result vector are set to zero.\n"
15856"///\n"
15857"/// \\headerfile <x86intrin.h>\n"
15858"///\n"
15859"/// This intrinsic corresponds to the <c> VCVTTPD2DQ / CVTTPD2DQ </c>\n"
15860"/// instruction.\n"
15861"///\n"
15862"/// \\param __a\n"
15863"/// A 128-bit vector of [2 x double].\n"
15864"/// \\returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the\n"
15865"/// converted values. The upper 64 bits are set to zero.\n"
15866"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
15867"_mm_cvttpd_epi32(__m128d __a)\n"
15868"{\n"
15869" return (__m128i)__builtin_ia32_cvttpd2dq((__v2df)__a);\n"
15870"}\n"
15871"\n"
15872"/// Converts the low-order element of a [2 x double] vector into a 32-bit\n"
15873"/// signed integer value, truncating the result when it is inexact.\n"
15874"///\n"
15875"/// \\headerfile <x86intrin.h>\n"
15876"///\n"
15877"/// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c>\n"
15878"/// instruction.\n"
15879"///\n"
15880"/// \\param __a\n"
15881"/// A 128-bit vector of [2 x double]. The lower 64 bits are used in the\n"
15882"/// conversion.\n"
15883"/// \\returns A 32-bit signed integer containing the converted value.\n"
15884"static __inline__ int __DEFAULT_FN_ATTRS\n"
15885"_mm_cvttsd_si32(__m128d __a)\n"
15886"{\n"
15887" return __builtin_ia32_cvttsd2si((__v2df)__a);\n"
15888"}\n"
15889"\n"
15890"/// Converts the two double-precision floating-point elements of a\n"
15891"/// 128-bit vector of [2 x double] into two signed 32-bit integer values,\n"
15892"/// returned in a 64-bit vector of [2 x i32].\n"
15893"///\n"
15894"/// \\headerfile <x86intrin.h>\n"
15895"///\n"
15896"/// This intrinsic corresponds to the <c> CVTPD2PI </c> instruction.\n"
15897"///\n"
15898"/// \\param __a\n"
15899"/// A 128-bit vector of [2 x double].\n"
15900"/// \\returns A 64-bit vector of [2 x i32] containing the converted values.\n"
15901"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
15902"_mm_cvtpd_pi32(__m128d __a)\n"
15903"{\n"
15904" return (__m64)__builtin_ia32_cvtpd2pi((__v2df)__a);\n"
15905"}\n"
15906"\n"
15907"/// Converts the two double-precision floating-point elements of a\n"
15908"/// 128-bit vector of [2 x double] into two signed 32-bit integer values,\n"
15909"/// returned in a 64-bit vector of [2 x i32].\n"
15910"///\n"
15911"/// If the result of either conversion is inexact, the result is truncated\n"
15912"/// (rounded towards zero) regardless of the current MXCSR setting.\n"
15913"///\n"
15914"/// \\headerfile <x86intrin.h>\n"
15915"///\n"
15916"/// This intrinsic corresponds to the <c> CVTTPD2PI </c> instruction.\n"
15917"///\n"
15918"/// \\param __a\n"
15919"/// A 128-bit vector of [2 x double].\n"
15920"/// \\returns A 64-bit vector of [2 x i32] containing the converted values.\n"
15921"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
15922"_mm_cvttpd_pi32(__m128d __a)\n"
15923"{\n"
15924" return (__m64)__builtin_ia32_cvttpd2pi((__v2df)__a);\n"
15925"}\n"
15926"\n"
15927"/// Converts the two signed 32-bit integer elements of a 64-bit vector of\n"
15928"/// [2 x i32] into two double-precision floating-point values, returned in a\n"
15929"/// 128-bit vector of [2 x double].\n"
15930"///\n"
15931"/// \\headerfile <x86intrin.h>\n"
15932"///\n"
15933"/// This intrinsic corresponds to the <c> CVTPI2PD </c> instruction.\n"
15934"///\n"
15935"/// \\param __a\n"
15936"/// A 64-bit vector of [2 x i32].\n"
15937"/// \\returns A 128-bit vector of [2 x double] containing the converted values.\n"
15938"static __inline__ __m128d __DEFAULT_FN_ATTRS_MMX\n"
15939"_mm_cvtpi32_pd(__m64 __a)\n"
15940"{\n"
15941" return __builtin_ia32_cvtpi2pd((__v2si)__a);\n"
15942"}\n"
15943"\n"
15944"/// Returns the low-order element of a 128-bit vector of [2 x double] as\n"
15945"/// a double-precision floating-point value.\n"
15946"///\n"
15947"/// \\headerfile <x86intrin.h>\n"
15948"///\n"
15949"/// This intrinsic has no corresponding instruction.\n"
15950"///\n"
15951"/// \\param __a\n"
15952"/// A 128-bit vector of [2 x double]. The lower 64 bits are returned.\n"
15953"/// \\returns A double-precision floating-point value copied from the lower 64\n"
15954"/// bits of \\a __a.\n"
15955"static __inline__ double __DEFAULT_FN_ATTRS\n"
15956"_mm_cvtsd_f64(__m128d __a)\n"
15957"{\n"
15958" return __a[0];\n"
15959"}\n"
15960"\n"
15961"/// Loads a 128-bit floating-point vector of [2 x double] from an aligned\n"
15962"/// memory location.\n"
15963"///\n"
15964"/// \\headerfile <x86intrin.h>\n"
15965"///\n"
15966"/// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction.\n"
15967"///\n"
15968"/// \\param __dp\n"
15969"/// A pointer to a 128-bit memory location. The address of the memory\n"
15970"/// location has to be 16-byte aligned.\n"
15971"/// \\returns A 128-bit vector of [2 x double] containing the loaded values.\n"
15972"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15973"_mm_load_pd(double const *__dp)\n"
15974"{\n"
15975" return *(__m128d*)__dp;\n"
15976"}\n"
15977"\n"
15978"/// Loads a double-precision floating-point value from a specified memory\n"
15979"/// location and duplicates it to both vector elements of a 128-bit vector of\n"
15980"/// [2 x double].\n"
15981"///\n"
15982"/// \\headerfile <x86intrin.h>\n"
15983"///\n"
15984"/// This intrinsic corresponds to the <c> VMOVDDUP / MOVDDUP </c> instruction.\n"
15985"///\n"
15986"/// \\param __dp\n"
15987"/// A pointer to a memory location containing a double-precision value.\n"
15988"/// \\returns A 128-bit vector of [2 x double] containing the loaded and\n"
15989"/// duplicated values.\n"
15990"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
15991"_mm_load1_pd(double const *__dp)\n"
15992"{\n"
15993" struct __mm_load1_pd_struct {\n"
15994" double __u;\n"
15995" } __attribute__((__packed__, __may_alias__));\n"
15996" double __u = ((struct __mm_load1_pd_struct*)__dp)->__u;\n"
15997" return __extension__ (__m128d){ __u, __u };\n"
15998"}\n"
15999"\n"
16000"#define _mm_load_pd1(dp) _mm_load1_pd(dp)\n"
16001"\n"
16002"/// Loads two double-precision values, in reverse order, from an aligned\n"
16003"/// memory location into a 128-bit vector of [2 x double].\n"
16004"///\n"
16005"/// \\headerfile <x86intrin.h>\n"
16006"///\n"
16007"/// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction +\n"
16008"/// needed shuffling instructions. In AVX mode, the shuffling may be combined\n"
16009"/// with the \\c VMOVAPD, resulting in only a \\c VPERMILPD instruction.\n"
16010"///\n"
16011"/// \\param __dp\n"
16012"/// A 16-byte aligned pointer to an array of double-precision values to be\n"
16013"/// loaded in reverse order.\n"
16014"/// \\returns A 128-bit vector of [2 x double] containing the reversed loaded\n"
16015"/// values.\n"
16016"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16017"_mm_loadr_pd(double const *__dp)\n"
16018"{\n"
16019" __m128d __u = *(__m128d*)__dp;\n"
16020" return __builtin_shufflevector((__v2df)__u, (__v2df)__u, 1, 0);\n"
16021"}\n"
16022"\n"
16023"/// Loads a 128-bit floating-point vector of [2 x double] from an\n"
16024"/// unaligned memory location.\n"
16025"///\n"
16026"/// \\headerfile <x86intrin.h>\n"
16027"///\n"
16028"/// This intrinsic corresponds to the <c> VMOVUPD / MOVUPD </c> instruction.\n"
16029"///\n"
16030"/// \\param __dp\n"
16031"/// A pointer to a 128-bit memory location. The address of the memory\n"
16032"/// location does not have to be aligned.\n"
16033"/// \\returns A 128-bit vector of [2 x double] containing the loaded values.\n"
16034"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16035"_mm_loadu_pd(double const *__dp)\n"
16036"{\n"
16037" struct __loadu_pd {\n"
16038" __m128d __v;\n"
16039" } __attribute__((__packed__, __may_alias__));\n"
16040" return ((struct __loadu_pd*)__dp)->__v;\n"
16041"}\n"
16042"\n"
16043"/// Loads a 64-bit integer value to the low element of a 128-bit integer\n"
16044"/// vector and clears the upper element.\n"
16045"///\n"
16046"/// \\headerfile <x86intrin.h>\n"
16047"///\n"
16048"/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.\n"
16049"///\n"
16050"/// \\param __a\n"
16051"/// A pointer to a 64-bit memory location. The address of the memory\n"
16052"/// location does not have to be aligned.\n"
16053"/// \\returns A 128-bit vector of [2 x i64] containing the loaded value.\n"
16054"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16055"_mm_loadu_si64(void const *__a)\n"
16056"{\n"
16057" struct __loadu_si64 {\n"
16058" long long __v;\n"
16059" } __attribute__((__packed__, __may_alias__));\n"
16060" long long __u = ((struct __loadu_si64*)__a)->__v;\n"
16061" return __extension__ (__m128i)(__v2di){__u, 0L};\n"
16062"}\n"
16063"\n"
16064"/// Loads a 64-bit double-precision value to the low element of a\n"
16065"/// 128-bit integer vector and clears the upper element.\n"
16066"///\n"
16067"/// \\headerfile <x86intrin.h>\n"
16068"///\n"
16069"/// This intrinsic corresponds to the <c> VMOVSD / MOVSD </c> instruction.\n"
16070"///\n"
16071"/// \\param __dp\n"
16072"/// A pointer to a memory location containing a double-precision value.\n"
16073"/// The address of the memory location does not have to be aligned.\n"
16074"/// \\returns A 128-bit vector of [2 x double] containing the loaded value.\n"
16075"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16076"_mm_load_sd(double const *__dp)\n"
16077"{\n"
16078" struct __mm_load_sd_struct {\n"
16079" double __u;\n"
16080" } __attribute__((__packed__, __may_alias__));\n"
16081" double __u = ((struct __mm_load_sd_struct*)__dp)->__u;\n"
16082" return __extension__ (__m128d){ __u, 0 };\n"
16083"}\n"
16084"\n"
16085"/// Loads a double-precision value into the high-order bits of a 128-bit\n"
16086"/// vector of [2 x double]. The low-order bits are copied from the low-order\n"
16087"/// bits of the first operand.\n"
16088"///\n"
16089"/// \\headerfile <x86intrin.h>\n"
16090"///\n"
16091"/// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction.\n"
16092"///\n"
16093"/// \\param __a\n"
16094"/// A 128-bit vector of [2 x double]. \\n\n"
16095"/// Bits [63:0] are written to bits [63:0] of the result.\n"
16096"/// \\param __dp\n"
16097"/// A pointer to a 64-bit memory location containing a double-precision\n"
16098"/// floating-point value that is loaded. The loaded value is written to bits\n"
16099"/// [127:64] of the result. The address of the memory location does not have\n"
16100"/// to be aligned.\n"
16101"/// \\returns A 128-bit vector of [2 x double] containing the moved values.\n"
16102"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16103"_mm_loadh_pd(__m128d __a, double const *__dp)\n"
16104"{\n"
16105" struct __mm_loadh_pd_struct {\n"
16106" double __u;\n"
16107" } __attribute__((__packed__, __may_alias__));\n"
16108" double __u = ((struct __mm_loadh_pd_struct*)__dp)->__u;\n"
16109" return __extension__ (__m128d){ __a[0], __u };\n"
16110"}\n"
16111"\n"
16112"/// Loads a double-precision value into the low-order bits of a 128-bit\n"
16113"/// vector of [2 x double]. The high-order bits are copied from the\n"
16114"/// high-order bits of the first operand.\n"
16115"///\n"
16116"/// \\headerfile <x86intrin.h>\n"
16117"///\n"
16118"/// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction.\n"
16119"///\n"
16120"/// \\param __a\n"
16121"/// A 128-bit vector of [2 x double]. \\n\n"
16122"/// Bits [127:64] are written to bits [127:64] of the result.\n"
16123"/// \\param __dp\n"
16124"/// A pointer to a 64-bit memory location containing a double-precision\n"
16125"/// floating-point value that is loaded. The loaded value is written to bits\n"
16126"/// [63:0] of the result. The address of the memory location does not have to\n"
16127"/// be aligned.\n"
16128"/// \\returns A 128-bit vector of [2 x double] containing the moved values.\n"
16129"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16130"_mm_loadl_pd(__m128d __a, double const *__dp)\n"
16131"{\n"
16132" struct __mm_loadl_pd_struct {\n"
16133" double __u;\n"
16134" } __attribute__((__packed__, __may_alias__));\n"
16135" double __u = ((struct __mm_loadl_pd_struct*)__dp)->__u;\n"
16136" return __extension__ (__m128d){ __u, __a[1] };\n"
16137"}\n"
16138"\n"
16139"/// Constructs a 128-bit floating-point vector of [2 x double] with\n"
16140"/// unspecified content. This could be used as an argument to another\n"
16141"/// intrinsic function where the argument is required but the value is not\n"
16142"/// actually used.\n"
16143"///\n"
16144"/// \\headerfile <x86intrin.h>\n"
16145"///\n"
16146"/// This intrinsic has no corresponding instruction.\n"
16147"///\n"
16148"/// \\returns A 128-bit floating-point vector of [2 x double] with unspecified\n"
16149"/// content.\n"
16150"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16151"_mm_undefined_pd(void)\n"
16152"{\n"
16153" return (__m128d)__builtin_ia32_undef128();\n"
16154"}\n"
16155"\n"
16156"/// Constructs a 128-bit floating-point vector of [2 x double]. The lower\n"
16157"/// 64 bits of the vector are initialized with the specified double-precision\n"
16158"/// floating-point value. The upper 64 bits are set to zero.\n"
16159"///\n"
16160"/// \\headerfile <x86intrin.h>\n"
16161"///\n"
16162"/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.\n"
16163"///\n"
16164"/// \\param __w\n"
16165"/// A double-precision floating-point value used to initialize the lower 64\n"
16166"/// bits of the result.\n"
16167"/// \\returns An initialized 128-bit floating-point vector of [2 x double]. The\n"
16168"/// lower 64 bits contain the value of the parameter. The upper 64 bits are\n"
16169"/// set to zero.\n"
16170"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16171"_mm_set_sd(double __w)\n"
16172"{\n"
16173" return __extension__ (__m128d){ __w, 0 };\n"
16174"}\n"
16175"\n"
16176"/// Constructs a 128-bit floating-point vector of [2 x double], with each\n"
16177"/// of the two double-precision floating-point vector elements set to the\n"
16178"/// specified double-precision floating-point value.\n"
16179"///\n"
16180"/// \\headerfile <x86intrin.h>\n"
16181"///\n"
16182"/// This intrinsic corresponds to the <c> VMOVDDUP / MOVLHPS </c> instruction.\n"
16183"///\n"
16184"/// \\param __w\n"
16185"/// A double-precision floating-point value used to initialize each vector\n"
16186"/// element of the result.\n"
16187"/// \\returns An initialized 128-bit floating-point vector of [2 x double].\n"
16188"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16189"_mm_set1_pd(double __w)\n"
16190"{\n"
16191" return __extension__ (__m128d){ __w, __w };\n"
16192"}\n"
16193"\n"
16194"/// Constructs a 128-bit floating-point vector of [2 x double], with each\n"
16195"/// of the two double-precision floating-point vector elements set to the\n"
16196"/// specified double-precision floating-point value.\n"
16197"///\n"
16198"/// \\headerfile <x86intrin.h>\n"
16199"///\n"
16200"/// This intrinsic corresponds to the <c> VMOVDDUP / MOVLHPS </c> instruction.\n"
16201"///\n"
16202"/// \\param __w\n"
16203"/// A double-precision floating-point value used to initialize each vector\n"
16204"/// element of the result.\n"
16205"/// \\returns An initialized 128-bit floating-point vector of [2 x double].\n"
16206"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16207"_mm_set_pd1(double __w)\n"
16208"{\n"
16209" return _mm_set1_pd(__w);\n"
16210"}\n"
16211"\n"
16212"/// Constructs a 128-bit floating-point vector of [2 x double]\n"
16213"/// initialized with the specified double-precision floating-point values.\n"
16214"///\n"
16215"/// \\headerfile <x86intrin.h>\n"
16216"///\n"
16217"/// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction.\n"
16218"///\n"
16219"/// \\param __w\n"
16220"/// A double-precision floating-point value used to initialize the upper 64\n"
16221"/// bits of the result.\n"
16222"/// \\param __x\n"
16223"/// A double-precision floating-point value used to initialize the lower 64\n"
16224"/// bits of the result.\n"
16225"/// \\returns An initialized 128-bit floating-point vector of [2 x double].\n"
16226"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16227"_mm_set_pd(double __w, double __x)\n"
16228"{\n"
16229" return __extension__ (__m128d){ __x, __w };\n"
16230"}\n"
16231"\n"
16232"/// Constructs a 128-bit floating-point vector of [2 x double],\n"
16233"/// initialized in reverse order with the specified double-precision\n"
16234"/// floating-point values.\n"
16235"///\n"
16236"/// \\headerfile <x86intrin.h>\n"
16237"///\n"
16238"/// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction.\n"
16239"///\n"
16240"/// \\param __w\n"
16241"/// A double-precision floating-point value used to initialize the lower 64\n"
16242"/// bits of the result.\n"
16243"/// \\param __x\n"
16244"/// A double-precision floating-point value used to initialize the upper 64\n"
16245"/// bits of the result.\n"
16246"/// \\returns An initialized 128-bit floating-point vector of [2 x double].\n"
16247"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16248"_mm_setr_pd(double __w, double __x)\n"
16249"{\n"
16250" return __extension__ (__m128d){ __w, __x };\n"
16251"}\n"
16252"\n"
16253"/// Constructs a 128-bit floating-point vector of [2 x double]\n"
16254"/// initialized to zero.\n"
16255"///\n"
16256"/// \\headerfile <x86intrin.h>\n"
16257"///\n"
16258"/// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction.\n"
16259"///\n"
16260"/// \\returns An initialized 128-bit floating-point vector of [2 x double] with\n"
16261"/// all elements set to zero.\n"
16262"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16263"_mm_setzero_pd(void)\n"
16264"{\n"
16265" return __extension__ (__m128d){ 0, 0 };\n"
16266"}\n"
16267"\n"
16268"/// Constructs a 128-bit floating-point vector of [2 x double]. The lower\n"
16269"/// 64 bits are set to the lower 64 bits of the second parameter. The upper\n"
16270"/// 64 bits are set to the upper 64 bits of the first parameter.\n"
16271"///\n"
16272"/// \\headerfile <x86intrin.h>\n"
16273"///\n"
16274"/// This intrinsic corresponds to the <c> VBLENDPD / BLENDPD </c> instruction.\n"
16275"///\n"
16276"/// \\param __a\n"
16277"/// A 128-bit vector of [2 x double]. The upper 64 bits are written to the\n"
16278"/// upper 64 bits of the result.\n"
16279"/// \\param __b\n"
16280"/// A 128-bit vector of [2 x double]. The lower 64 bits are written to the\n"
16281"/// lower 64 bits of the result.\n"
16282"/// \\returns A 128-bit vector of [2 x double] containing the moved values.\n"
16283"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
16284"_mm_move_sd(__m128d __a, __m128d __b)\n"
16285"{\n"
16286" __a[0] = __b[0];\n"
16287" return __a;\n"
16288"}\n"
16289"\n"
16290"/// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a\n"
16291"/// memory location.\n"
16292"///\n"
16293"/// \\headerfile <x86intrin.h>\n"
16294"///\n"
16295"/// This intrinsic corresponds to the <c> VMOVSD / MOVSD </c> instruction.\n"
16296"///\n"
16297"/// \\param __dp\n"
16298"/// A pointer to a 64-bit memory location.\n"
16299"/// \\param __a\n"
16300"/// A 128-bit vector of [2 x double] containing the value to be stored.\n"
16301"static __inline__ void __DEFAULT_FN_ATTRS\n"
16302"_mm_store_sd(double *__dp, __m128d __a)\n"
16303"{\n"
16304" struct __mm_store_sd_struct {\n"
16305" double __u;\n"
16306" } __attribute__((__packed__, __may_alias__));\n"
16307" ((struct __mm_store_sd_struct*)__dp)->__u = __a[0];\n"
16308"}\n"
16309"\n"
16310"/// Moves packed double-precision values from a 128-bit vector of\n"
16311"/// [2 x double] to a memory location.\n"
16312"///\n"
16313"/// \\headerfile <x86intrin.h>\n"
16314"///\n"
16315"/// This intrinsic corresponds to the <c>VMOVAPD / MOVAPS</c> instruction.\n"
16316"///\n"
16317"/// \\param __dp\n"
16318"/// A pointer to an aligned memory location that can store two\n"
16319"/// double-precision values.\n"
16320"/// \\param __a\n"
16321"/// A packed 128-bit vector of [2 x double] containing the values to be\n"
16322"/// moved.\n"
16323"static __inline__ void __DEFAULT_FN_ATTRS\n"
16324"_mm_store_pd(double *__dp, __m128d __a)\n"
16325"{\n"
16326" *(__m128d*)__dp = __a;\n"
16327"}\n"
16328"\n"
16329"/// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to\n"
16330"/// the upper and lower 64 bits of a memory location.\n"
16331"///\n"
16332"/// \\headerfile <x86intrin.h>\n"
16333"///\n"
16334"/// This intrinsic corresponds to the\n"
16335"/// <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction.\n"
16336"///\n"
16337"/// \\param __dp\n"
16338"/// A pointer to a memory location that can store two double-precision\n"
16339"/// values.\n"
16340"/// \\param __a\n"
16341"/// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each\n"
16342"/// of the values in \\a __dp.\n"
16343"static __inline__ void __DEFAULT_FN_ATTRS\n"
16344"_mm_store1_pd(double *__dp, __m128d __a)\n"
16345"{\n"
16346" __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);\n"
16347" _mm_store_pd(__dp, __a);\n"
16348"}\n"
16349"\n"
16350"/// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to\n"
16351"/// the upper and lower 64 bits of a memory location.\n"
16352"///\n"
16353"/// \\headerfile <x86intrin.h>\n"
16354"///\n"
16355"/// This intrinsic corresponds to the\n"
16356"/// <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction.\n"
16357"///\n"
16358"/// \\param __dp\n"
16359"/// A pointer to a memory location that can store two double-precision\n"
16360"/// values.\n"
16361"/// \\param __a\n"
16362"/// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each\n"
16363"/// of the values in \\a __dp.\n"
16364"static __inline__ void __DEFAULT_FN_ATTRS\n"
16365"_mm_store_pd1(double *__dp, __m128d __a)\n"
16366"{\n"
16367" _mm_store1_pd(__dp, __a);\n"
16368"}\n"
16369"\n"
16370"/// Stores a 128-bit vector of [2 x double] into an unaligned memory\n"
16371"/// location.\n"
16372"///\n"
16373"/// \\headerfile <x86intrin.h>\n"
16374"///\n"
16375"/// This intrinsic corresponds to the <c> VMOVUPD / MOVUPD </c> instruction.\n"
16376"///\n"
16377"/// \\param __dp\n"
16378"/// A pointer to a 128-bit memory location. The address of the memory\n"
16379"/// location does not have to be aligned.\n"
16380"/// \\param __a\n"
16381"/// A 128-bit vector of [2 x double] containing the values to be stored.\n"
16382"static __inline__ void __DEFAULT_FN_ATTRS\n"
16383"_mm_storeu_pd(double *__dp, __m128d __a)\n"
16384"{\n"
16385" struct __storeu_pd {\n"
16386" __m128d __v;\n"
16387" } __attribute__((__packed__, __may_alias__));\n"
16388" ((struct __storeu_pd*)__dp)->__v = __a;\n"
16389"}\n"
16390"\n"
16391"/// Stores two double-precision values, in reverse order, from a 128-bit\n"
16392"/// vector of [2 x double] to a 16-byte aligned memory location.\n"
16393"///\n"
16394"/// \\headerfile <x86intrin.h>\n"
16395"///\n"
16396"/// This intrinsic corresponds to a shuffling instruction followed by a\n"
16397"/// <c> VMOVAPD / MOVAPD </c> instruction.\n"
16398"///\n"
16399"/// \\param __dp\n"
16400"/// A pointer to a 16-byte aligned memory location that can store two\n"
16401"/// double-precision values.\n"
16402"/// \\param __a\n"
16403"/// A 128-bit vector of [2 x double] containing the values to be reversed and\n"
16404"/// stored.\n"
16405"static __inline__ void __DEFAULT_FN_ATTRS\n"
16406"_mm_storer_pd(double *__dp, __m128d __a)\n"
16407"{\n"
16408" __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 1, 0);\n"
16409" *(__m128d *)__dp = __a;\n"
16410"}\n"
16411"\n"
16412"/// Stores the upper 64 bits of a 128-bit vector of [2 x double] to a\n"
16413"/// memory location.\n"
16414"///\n"
16415"/// \\headerfile <x86intrin.h>\n"
16416"///\n"
16417"/// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction.\n"
16418"///\n"
16419"/// \\param __dp\n"
16420"/// A pointer to a 64-bit memory location.\n"
16421"/// \\param __a\n"
16422"/// A 128-bit vector of [2 x double] containing the value to be stored.\n"
16423"static __inline__ void __DEFAULT_FN_ATTRS\n"
16424"_mm_storeh_pd(double *__dp, __m128d __a)\n"
16425"{\n"
16426" struct __mm_storeh_pd_struct {\n"
16427" double __u;\n"
16428" } __attribute__((__packed__, __may_alias__));\n"
16429" ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[1];\n"
16430"}\n"
16431"\n"
16432"/// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a\n"
16433"/// memory location.\n"
16434"///\n"
16435"/// \\headerfile <x86intrin.h>\n"
16436"///\n"
16437"/// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction.\n"
16438"///\n"
16439"/// \\param __dp\n"
16440"/// A pointer to a 64-bit memory location.\n"
16441"/// \\param __a\n"
16442"/// A 128-bit vector of [2 x double] containing the value to be stored.\n"
16443"static __inline__ void __DEFAULT_FN_ATTRS\n"
16444"_mm_storel_pd(double *__dp, __m128d __a)\n"
16445"{\n"
16446" struct __mm_storeh_pd_struct {\n"
16447" double __u;\n"
16448" } __attribute__((__packed__, __may_alias__));\n"
16449" ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[0];\n"
16450"}\n"
16451"\n"
16452"/// Adds the corresponding elements of two 128-bit vectors of [16 x i8],\n"
16453"/// saving the lower 8 bits of each sum in the corresponding element of a\n"
16454"/// 128-bit result vector of [16 x i8].\n"
16455"///\n"
16456"/// The integer elements of both parameters can be either signed or unsigned.\n"
16457"///\n"
16458"/// \\headerfile <x86intrin.h>\n"
16459"///\n"
16460"/// This intrinsic corresponds to the <c> VPADDB / PADDB </c> instruction.\n"
16461"///\n"
16462"/// \\param __a\n"
16463"/// A 128-bit vector of [16 x i8].\n"
16464"/// \\param __b\n"
16465"/// A 128-bit vector of [16 x i8].\n"
16466"/// \\returns A 128-bit vector of [16 x i8] containing the sums of both\n"
16467"/// parameters.\n"
16468"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16469"_mm_add_epi8(__m128i __a, __m128i __b)\n"
16470"{\n"
16471" return (__m128i)((__v16qu)__a + (__v16qu)__b);\n"
16472"}\n"
16473"\n"
16474"/// Adds the corresponding elements of two 128-bit vectors of [8 x i16],\n"
16475"/// saving the lower 16 bits of each sum in the corresponding element of a\n"
16476"/// 128-bit result vector of [8 x i16].\n"
16477"///\n"
16478"/// The integer elements of both parameters can be either signed or unsigned.\n"
16479"///\n"
16480"/// \\headerfile <x86intrin.h>\n"
16481"///\n"
16482"/// This intrinsic corresponds to the <c> VPADDW / PADDW </c> instruction.\n"
16483"///\n"
16484"/// \\param __a\n"
16485"/// A 128-bit vector of [8 x i16].\n"
16486"/// \\param __b\n"
16487"/// A 128-bit vector of [8 x i16].\n"
16488"/// \\returns A 128-bit vector of [8 x i16] containing the sums of both\n"
16489"/// parameters.\n"
16490"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16491"_mm_add_epi16(__m128i __a, __m128i __b)\n"
16492"{\n"
16493" return (__m128i)((__v8hu)__a + (__v8hu)__b);\n"
16494"}\n"
16495"\n"
16496"/// Adds the corresponding elements of two 128-bit vectors of [4 x i32],\n"
16497"/// saving the lower 32 bits of each sum in the corresponding element of a\n"
16498"/// 128-bit result vector of [4 x i32].\n"
16499"///\n"
16500"/// The integer elements of both parameters can be either signed or unsigned.\n"
16501"///\n"
16502"/// \\headerfile <x86intrin.h>\n"
16503"///\n"
16504"/// This intrinsic corresponds to the <c> VPADDD / PADDD </c> instruction.\n"
16505"///\n"
16506"/// \\param __a\n"
16507"/// A 128-bit vector of [4 x i32].\n"
16508"/// \\param __b\n"
16509"/// A 128-bit vector of [4 x i32].\n"
16510"/// \\returns A 128-bit vector of [4 x i32] containing the sums of both\n"
16511"/// parameters.\n"
16512"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16513"_mm_add_epi32(__m128i __a, __m128i __b)\n"
16514"{\n"
16515" return (__m128i)((__v4su)__a + (__v4su)__b);\n"
16516"}\n"
16517"\n"
16518"/// Adds two signed or unsigned 64-bit integer values, returning the\n"
16519"/// lower 64 bits of the sum.\n"
16520"///\n"
16521"/// \\headerfile <x86intrin.h>\n"
16522"///\n"
16523"/// This intrinsic corresponds to the <c> PADDQ </c> instruction.\n"
16524"///\n"
16525"/// \\param __a\n"
16526"/// A 64-bit integer.\n"
16527"/// \\param __b\n"
16528"/// A 64-bit integer.\n"
16529"/// \\returns A 64-bit integer containing the sum of both parameters.\n"
16530"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
16531"_mm_add_si64(__m64 __a, __m64 __b)\n"
16532"{\n"
16533" return (__m64)__builtin_ia32_paddq((__v1di)__a, (__v1di)__b);\n"
16534"}\n"
16535"\n"
16536"/// Adds the corresponding elements of two 128-bit vectors of [2 x i64],\n"
16537"/// saving the lower 64 bits of each sum in the corresponding element of a\n"
16538"/// 128-bit result vector of [2 x i64].\n"
16539"///\n"
16540"/// The integer elements of both parameters can be either signed or unsigned.\n"
16541"///\n"
16542"/// \\headerfile <x86intrin.h>\n"
16543"///\n"
16544"/// This intrinsic corresponds to the <c> VPADDQ / PADDQ </c> instruction.\n"
16545"///\n"
16546"/// \\param __a\n"
16547"/// A 128-bit vector of [2 x i64].\n"
16548"/// \\param __b\n"
16549"/// A 128-bit vector of [2 x i64].\n"
16550"/// \\returns A 128-bit vector of [2 x i64] containing the sums of both\n"
16551"/// parameters.\n"
16552"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16553"_mm_add_epi64(__m128i __a, __m128i __b)\n"
16554"{\n"
16555" return (__m128i)((__v2du)__a + (__v2du)__b);\n"
16556"}\n"
16557"\n"
16558"/// Adds, with saturation, the corresponding elements of two 128-bit\n"
16559"/// signed [16 x i8] vectors, saving each sum in the corresponding element of\n"
16560"/// a 128-bit result vector of [16 x i8]. Positive sums greater than 0x7F are\n"
16561"/// saturated to 0x7F. Negative sums less than 0x80 are saturated to 0x80.\n"
16562"///\n"
16563"/// \\headerfile <x86intrin.h>\n"
16564"///\n"
16565"/// This intrinsic corresponds to the <c> VPADDSB / PADDSB </c> instruction.\n"
16566"///\n"
16567"/// \\param __a\n"
16568"/// A 128-bit signed [16 x i8] vector.\n"
16569"/// \\param __b\n"
16570"/// A 128-bit signed [16 x i8] vector.\n"
16571"/// \\returns A 128-bit signed [16 x i8] vector containing the saturated sums of\n"
16572"/// both parameters.\n"
16573"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16574"_mm_adds_epi8(__m128i __a, __m128i __b)\n"
16575"{\n"
16576" return (__m128i)__builtin_ia32_paddsb128((__v16qi)__a, (__v16qi)__b);\n"
16577"}\n"
16578"\n"
16579"/// Adds, with saturation, the corresponding elements of two 128-bit\n"
16580"/// signed [8 x i16] vectors, saving each sum in the corresponding element of\n"
16581"/// a 128-bit result vector of [8 x i16]. Positive sums greater than 0x7FFF\n"
16582"/// are saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to\n"
16583"/// 0x8000.\n"
16584"///\n"
16585"/// \\headerfile <x86intrin.h>\n"
16586"///\n"
16587"/// This intrinsic corresponds to the <c> VPADDSW / PADDSW </c> instruction.\n"
16588"///\n"
16589"/// \\param __a\n"
16590"/// A 128-bit signed [8 x i16] vector.\n"
16591"/// \\param __b\n"
16592"/// A 128-bit signed [8 x i16] vector.\n"
16593"/// \\returns A 128-bit signed [8 x i16] vector containing the saturated sums of\n"
16594"/// both parameters.\n"
16595"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16596"_mm_adds_epi16(__m128i __a, __m128i __b)\n"
16597"{\n"
16598" return (__m128i)__builtin_ia32_paddsw128((__v8hi)__a, (__v8hi)__b);\n"
16599"}\n"
16600"\n"
16601"/// Adds, with saturation, the corresponding elements of two 128-bit\n"
16602"/// unsigned [16 x i8] vectors, saving each sum in the corresponding element\n"
16603"/// of a 128-bit result vector of [16 x i8]. Positive sums greater than 0xFF\n"
16604"/// are saturated to 0xFF. Negative sums are saturated to 0x00.\n"
16605"///\n"
16606"/// \\headerfile <x86intrin.h>\n"
16607"///\n"
16608"/// This intrinsic corresponds to the <c> VPADDUSB / PADDUSB </c> instruction.\n"
16609"///\n"
16610"/// \\param __a\n"
16611"/// A 128-bit unsigned [16 x i8] vector.\n"
16612"/// \\param __b\n"
16613"/// A 128-bit unsigned [16 x i8] vector.\n"
16614"/// \\returns A 128-bit unsigned [16 x i8] vector containing the saturated sums\n"
16615"/// of both parameters.\n"
16616"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16617"_mm_adds_epu8(__m128i __a, __m128i __b)\n"
16618"{\n"
16619" return (__m128i)__builtin_ia32_paddusb128((__v16qi)__a, (__v16qi)__b);\n"
16620"}\n"
16621"\n"
16622"/// Adds, with saturation, the corresponding elements of two 128-bit\n"
16623"/// unsigned [8 x i16] vectors, saving each sum in the corresponding element\n"
16624"/// of a 128-bit result vector of [8 x i16]. Positive sums greater than\n"
16625"/// 0xFFFF are saturated to 0xFFFF. Negative sums are saturated to 0x0000.\n"
16626"///\n"
16627"/// \\headerfile <x86intrin.h>\n"
16628"///\n"
16629"/// This intrinsic corresponds to the <c> VPADDUSB / PADDUSB </c> instruction.\n"
16630"///\n"
16631"/// \\param __a\n"
16632"/// A 128-bit unsigned [8 x i16] vector.\n"
16633"/// \\param __b\n"
16634"/// A 128-bit unsigned [8 x i16] vector.\n"
16635"/// \\returns A 128-bit unsigned [8 x i16] vector containing the saturated sums\n"
16636"/// of both parameters.\n"
16637"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16638"_mm_adds_epu16(__m128i __a, __m128i __b)\n"
16639"{\n"
16640" return (__m128i)__builtin_ia32_paddusw128((__v8hi)__a, (__v8hi)__b);\n"
16641"}\n"
16642"\n"
16643"/// Computes the rounded avarages of corresponding elements of two\n"
16644"/// 128-bit unsigned [16 x i8] vectors, saving each result in the\n"
16645"/// corresponding element of a 128-bit result vector of [16 x i8].\n"
16646"///\n"
16647"/// \\headerfile <x86intrin.h>\n"
16648"///\n"
16649"/// This intrinsic corresponds to the <c> VPAVGB / PAVGB </c> instruction.\n"
16650"///\n"
16651"/// \\param __a\n"
16652"/// A 128-bit unsigned [16 x i8] vector.\n"
16653"/// \\param __b\n"
16654"/// A 128-bit unsigned [16 x i8] vector.\n"
16655"/// \\returns A 128-bit unsigned [16 x i8] vector containing the rounded\n"
16656"/// averages of both parameters.\n"
16657"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16658"_mm_avg_epu8(__m128i __a, __m128i __b)\n"
16659"{\n"
16660" typedef unsigned short __v16hu __attribute__ ((__vector_size__ (32)));\n"
16661" return (__m128i)__builtin_convertvector(\n"
16662" ((__builtin_convertvector((__v16qu)__a, __v16hu) +\n"
16663" __builtin_convertvector((__v16qu)__b, __v16hu)) + 1)\n"
16664" >> 1, __v16qu);\n"
16665"}\n"
16666"\n"
16667"/// Computes the rounded avarages of corresponding elements of two\n"
16668"/// 128-bit unsigned [8 x i16] vectors, saving each result in the\n"
16669"/// corresponding element of a 128-bit result vector of [8 x i16].\n"
16670"///\n"
16671"/// \\headerfile <x86intrin.h>\n"
16672"///\n"
16673"/// This intrinsic corresponds to the <c> VPAVGW / PAVGW </c> instruction.\n"
16674"///\n"
16675"/// \\param __a\n"
16676"/// A 128-bit unsigned [8 x i16] vector.\n"
16677"/// \\param __b\n"
16678"/// A 128-bit unsigned [8 x i16] vector.\n"
16679"/// \\returns A 128-bit unsigned [8 x i16] vector containing the rounded\n"
16680"/// averages of both parameters.\n"
16681"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16682"_mm_avg_epu16(__m128i __a, __m128i __b)\n"
16683"{\n"
16684" typedef unsigned int __v8su __attribute__ ((__vector_size__ (32)));\n"
16685" return (__m128i)__builtin_convertvector(\n"
16686" ((__builtin_convertvector((__v8hu)__a, __v8su) +\n"
16687" __builtin_convertvector((__v8hu)__b, __v8su)) + 1)\n"
16688" >> 1, __v8hu);\n"
16689"}\n"
16690"\n"
16691"/// Multiplies the corresponding elements of two 128-bit signed [8 x i16]\n"
16692"/// vectors, producing eight intermediate 32-bit signed integer products, and\n"
16693"/// adds the consecutive pairs of 32-bit products to form a 128-bit signed\n"
16694"/// [4 x i32] vector.\n"
16695"///\n"
16696"/// For example, bits [15:0] of both parameters are multiplied producing a\n"
16697"/// 32-bit product, bits [31:16] of both parameters are multiplied producing\n"
16698"/// a 32-bit product, and the sum of those two products becomes bits [31:0]\n"
16699"/// of the result.\n"
16700"///\n"
16701"/// \\headerfile <x86intrin.h>\n"
16702"///\n"
16703"/// This intrinsic corresponds to the <c> VPMADDWD / PMADDWD </c> instruction.\n"
16704"///\n"
16705"/// \\param __a\n"
16706"/// A 128-bit signed [8 x i16] vector.\n"
16707"/// \\param __b\n"
16708"/// A 128-bit signed [8 x i16] vector.\n"
16709"/// \\returns A 128-bit signed [4 x i32] vector containing the sums of products\n"
16710"/// of both parameters.\n"
16711"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16712"_mm_madd_epi16(__m128i __a, __m128i __b)\n"
16713"{\n"
16714" return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b);\n"
16715"}\n"
16716"\n"
16717"/// Compares corresponding elements of two 128-bit signed [8 x i16]\n"
16718"/// vectors, saving the greater value from each comparison in the\n"
16719"/// corresponding element of a 128-bit result vector of [8 x i16].\n"
16720"///\n"
16721"/// \\headerfile <x86intrin.h>\n"
16722"///\n"
16723"/// This intrinsic corresponds to the <c> VPMAXSW / PMAXSW </c> instruction.\n"
16724"///\n"
16725"/// \\param __a\n"
16726"/// A 128-bit signed [8 x i16] vector.\n"
16727"/// \\param __b\n"
16728"/// A 128-bit signed [8 x i16] vector.\n"
16729"/// \\returns A 128-bit signed [8 x i16] vector containing the greater value of\n"
16730"/// each comparison.\n"
16731"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16732"_mm_max_epi16(__m128i __a, __m128i __b)\n"
16733"{\n"
16734" return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)__a, (__v8hi)__b);\n"
16735"}\n"
16736"\n"
16737"/// Compares corresponding elements of two 128-bit unsigned [16 x i8]\n"
16738"/// vectors, saving the greater value from each comparison in the\n"
16739"/// corresponding element of a 128-bit result vector of [16 x i8].\n"
16740"///\n"
16741"/// \\headerfile <x86intrin.h>\n"
16742"///\n"
16743"/// This intrinsic corresponds to the <c> VPMAXUB / PMAXUB </c> instruction.\n"
16744"///\n"
16745"/// \\param __a\n"
16746"/// A 128-bit unsigned [16 x i8] vector.\n"
16747"/// \\param __b\n"
16748"/// A 128-bit unsigned [16 x i8] vector.\n"
16749"/// \\returns A 128-bit unsigned [16 x i8] vector containing the greater value of\n"
16750"/// each comparison.\n"
16751"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16752"_mm_max_epu8(__m128i __a, __m128i __b)\n"
16753"{\n"
16754" return (__m128i)__builtin_ia32_pmaxub128((__v16qi)__a, (__v16qi)__b);\n"
16755"}\n"
16756"\n"
16757"/// Compares corresponding elements of two 128-bit signed [8 x i16]\n"
16758"/// vectors, saving the smaller value from each comparison in the\n"
16759"/// corresponding element of a 128-bit result vector of [8 x i16].\n"
16760"///\n"
16761"/// \\headerfile <x86intrin.h>\n"
16762"///\n"
16763"/// This intrinsic corresponds to the <c> VPMINSW / PMINSW </c> instruction.\n"
16764"///\n"
16765"/// \\param __a\n"
16766"/// A 128-bit signed [8 x i16] vector.\n"
16767"/// \\param __b\n"
16768"/// A 128-bit signed [8 x i16] vector.\n"
16769"/// \\returns A 128-bit signed [8 x i16] vector containing the smaller value of\n"
16770"/// each comparison.\n"
16771"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16772"_mm_min_epi16(__m128i __a, __m128i __b)\n"
16773"{\n"
16774" return (__m128i)__builtin_ia32_pminsw128((__v8hi)__a, (__v8hi)__b);\n"
16775"}\n"
16776"\n"
16777"/// Compares corresponding elements of two 128-bit unsigned [16 x i8]\n"
16778"/// vectors, saving the smaller value from each comparison in the\n"
16779"/// corresponding element of a 128-bit result vector of [16 x i8].\n"
16780"///\n"
16781"/// \\headerfile <x86intrin.h>\n"
16782"///\n"
16783"/// This intrinsic corresponds to the <c> VPMINUB / PMINUB </c> instruction.\n"
16784"///\n"
16785"/// \\param __a\n"
16786"/// A 128-bit unsigned [16 x i8] vector.\n"
16787"/// \\param __b\n"
16788"/// A 128-bit unsigned [16 x i8] vector.\n"
16789"/// \\returns A 128-bit unsigned [16 x i8] vector containing the smaller value of\n"
16790"/// each comparison.\n"
16791"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16792"_mm_min_epu8(__m128i __a, __m128i __b)\n"
16793"{\n"
16794" return (__m128i)__builtin_ia32_pminub128((__v16qi)__a, (__v16qi)__b);\n"
16795"}\n"
16796"\n"
16797"/// Multiplies the corresponding elements of two signed [8 x i16]\n"
16798"/// vectors, saving the upper 16 bits of each 32-bit product in the\n"
16799"/// corresponding element of a 128-bit signed [8 x i16] result vector.\n"
16800"///\n"
16801"/// \\headerfile <x86intrin.h>\n"
16802"///\n"
16803"/// This intrinsic corresponds to the <c> VPMULHW / PMULHW </c> instruction.\n"
16804"///\n"
16805"/// \\param __a\n"
16806"/// A 128-bit signed [8 x i16] vector.\n"
16807"/// \\param __b\n"
16808"/// A 128-bit signed [8 x i16] vector.\n"
16809"/// \\returns A 128-bit signed [8 x i16] vector containing the upper 16 bits of\n"
16810"/// each of the eight 32-bit products.\n"
16811"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16812"_mm_mulhi_epi16(__m128i __a, __m128i __b)\n"
16813"{\n"
16814" return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b);\n"
16815"}\n"
16816"\n"
16817"/// Multiplies the corresponding elements of two unsigned [8 x i16]\n"
16818"/// vectors, saving the upper 16 bits of each 32-bit product in the\n"
16819"/// corresponding element of a 128-bit unsigned [8 x i16] result vector.\n"
16820"///\n"
16821"/// \\headerfile <x86intrin.h>\n"
16822"///\n"
16823"/// This intrinsic corresponds to the <c> VPMULHUW / PMULHUW </c> instruction.\n"
16824"///\n"
16825"/// \\param __a\n"
16826"/// A 128-bit unsigned [8 x i16] vector.\n"
16827"/// \\param __b\n"
16828"/// A 128-bit unsigned [8 x i16] vector.\n"
16829"/// \\returns A 128-bit unsigned [8 x i16] vector containing the upper 16 bits\n"
16830"/// of each of the eight 32-bit products.\n"
16831"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16832"_mm_mulhi_epu16(__m128i __a, __m128i __b)\n"
16833"{\n"
16834" return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b);\n"
16835"}\n"
16836"\n"
16837"/// Multiplies the corresponding elements of two signed [8 x i16]\n"
16838"/// vectors, saving the lower 16 bits of each 32-bit product in the\n"
16839"/// corresponding element of a 128-bit signed [8 x i16] result vector.\n"
16840"///\n"
16841"/// \\headerfile <x86intrin.h>\n"
16842"///\n"
16843"/// This intrinsic corresponds to the <c> VPMULLW / PMULLW </c> instruction.\n"
16844"///\n"
16845"/// \\param __a\n"
16846"/// A 128-bit signed [8 x i16] vector.\n"
16847"/// \\param __b\n"
16848"/// A 128-bit signed [8 x i16] vector.\n"
16849"/// \\returns A 128-bit signed [8 x i16] vector containing the lower 16 bits of\n"
16850"/// each of the eight 32-bit products.\n"
16851"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16852"_mm_mullo_epi16(__m128i __a, __m128i __b)\n"
16853"{\n"
16854" return (__m128i)((__v8hu)__a * (__v8hu)__b);\n"
16855"}\n"
16856"\n"
16857"/// Multiplies 32-bit unsigned integer values contained in the lower bits\n"
16858"/// of the two 64-bit integer vectors and returns the 64-bit unsigned\n"
16859"/// product.\n"
16860"///\n"
16861"/// \\headerfile <x86intrin.h>\n"
16862"///\n"
16863"/// This intrinsic corresponds to the <c> PMULUDQ </c> instruction.\n"
16864"///\n"
16865"/// \\param __a\n"
16866"/// A 64-bit integer containing one of the source operands.\n"
16867"/// \\param __b\n"
16868"/// A 64-bit integer containing one of the source operands.\n"
16869"/// \\returns A 64-bit integer vector containing the product of both operands.\n"
16870"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
16871"_mm_mul_su32(__m64 __a, __m64 __b)\n"
16872"{\n"
16873" return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b);\n"
16874"}\n"
16875"\n"
16876"/// Multiplies 32-bit unsigned integer values contained in the lower\n"
16877"/// bits of the corresponding elements of two [2 x i64] vectors, and returns\n"
16878"/// the 64-bit products in the corresponding elements of a [2 x i64] vector.\n"
16879"///\n"
16880"/// \\headerfile <x86intrin.h>\n"
16881"///\n"
16882"/// This intrinsic corresponds to the <c> VPMULUDQ / PMULUDQ </c> instruction.\n"
16883"///\n"
16884"/// \\param __a\n"
16885"/// A [2 x i64] vector containing one of the source operands.\n"
16886"/// \\param __b\n"
16887"/// A [2 x i64] vector containing one of the source operands.\n"
16888"/// \\returns A [2 x i64] vector containing the product of both operands.\n"
16889"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16890"_mm_mul_epu32(__m128i __a, __m128i __b)\n"
16891"{\n"
16892" return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b);\n"
16893"}\n"
16894"\n"
16895"/// Computes the absolute differences of corresponding 8-bit integer\n"
16896"/// values in two 128-bit vectors. Sums the first 8 absolute differences, and\n"
16897"/// separately sums the second 8 absolute differences. Packs these two\n"
16898"/// unsigned 16-bit integer sums into the upper and lower elements of a\n"
16899"/// [2 x i64] vector.\n"
16900"///\n"
16901"/// \\headerfile <x86intrin.h>\n"
16902"///\n"
16903"/// This intrinsic corresponds to the <c> VPSADBW / PSADBW </c> instruction.\n"
16904"///\n"
16905"/// \\param __a\n"
16906"/// A 128-bit integer vector containing one of the source operands.\n"
16907"/// \\param __b\n"
16908"/// A 128-bit integer vector containing one of the source operands.\n"
16909"/// \\returns A [2 x i64] vector containing the sums of the sets of absolute\n"
16910"/// differences between both operands.\n"
16911"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16912"_mm_sad_epu8(__m128i __a, __m128i __b)\n"
16913"{\n"
16914" return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b);\n"
16915"}\n"
16916"\n"
16917"/// Subtracts the corresponding 8-bit integer values in the operands.\n"
16918"///\n"
16919"/// \\headerfile <x86intrin.h>\n"
16920"///\n"
16921"/// This intrinsic corresponds to the <c> VPSUBB / PSUBB </c> instruction.\n"
16922"///\n"
16923"/// \\param __a\n"
16924"/// A 128-bit integer vector containing the minuends.\n"
16925"/// \\param __b\n"
16926"/// A 128-bit integer vector containing the subtrahends.\n"
16927"/// \\returns A 128-bit integer vector containing the differences of the values\n"
16928"/// in the operands.\n"
16929"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16930"_mm_sub_epi8(__m128i __a, __m128i __b)\n"
16931"{\n"
16932" return (__m128i)((__v16qu)__a - (__v16qu)__b);\n"
16933"}\n"
16934"\n"
16935"/// Subtracts the corresponding 16-bit integer values in the operands.\n"
16936"///\n"
16937"/// \\headerfile <x86intrin.h>\n"
16938"///\n"
16939"/// This intrinsic corresponds to the <c> VPSUBW / PSUBW </c> instruction.\n"
16940"///\n"
16941"/// \\param __a\n"
16942"/// A 128-bit integer vector containing the minuends.\n"
16943"/// \\param __b\n"
16944"/// A 128-bit integer vector containing the subtrahends.\n"
16945"/// \\returns A 128-bit integer vector containing the differences of the values\n"
16946"/// in the operands.\n"
16947"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16948"_mm_sub_epi16(__m128i __a, __m128i __b)\n"
16949"{\n"
16950" return (__m128i)((__v8hu)__a - (__v8hu)__b);\n"
16951"}\n"
16952"\n"
16953"/// Subtracts the corresponding 32-bit integer values in the operands.\n"
16954"///\n"
16955"/// \\headerfile <x86intrin.h>\n"
16956"///\n"
16957"/// This intrinsic corresponds to the <c> VPSUBD / PSUBD </c> instruction.\n"
16958"///\n"
16959"/// \\param __a\n"
16960"/// A 128-bit integer vector containing the minuends.\n"
16961"/// \\param __b\n"
16962"/// A 128-bit integer vector containing the subtrahends.\n"
16963"/// \\returns A 128-bit integer vector containing the differences of the values\n"
16964"/// in the operands.\n"
16965"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
16966"_mm_sub_epi32(__m128i __a, __m128i __b)\n"
16967"{\n"
16968" return (__m128i)((__v4su)__a - (__v4su)__b);\n"
16969"}\n"
16970"\n"
16971"/// Subtracts signed or unsigned 64-bit integer values and writes the\n"
16972"/// difference to the corresponding bits in the destination.\n"
16973"///\n"
16974"/// \\headerfile <x86intrin.h>\n"
16975"///\n"
16976"/// This intrinsic corresponds to the <c> PSUBQ </c> instruction.\n"
16977"///\n"
16978"/// \\param __a\n"
16979"/// A 64-bit integer vector containing the minuend.\n"
16980"/// \\param __b\n"
16981"/// A 64-bit integer vector containing the subtrahend.\n"
16982"/// \\returns A 64-bit integer vector containing the difference of the values in\n"
16983"/// the operands.\n"
16984"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
16985"_mm_sub_si64(__m64 __a, __m64 __b)\n"
16986"{\n"
16987" return (__m64)__builtin_ia32_psubq((__v1di)__a, (__v1di)__b);\n"
16988"}\n"
16989"\n"
16990"/// Subtracts the corresponding elements of two [2 x i64] vectors.\n"
16991"///\n"
16992"/// \\headerfile <x86intrin.h>\n"
16993"///\n"
16994"/// This intrinsic corresponds to the <c> VPSUBQ / PSUBQ </c> instruction.\n"
16995"///\n"
16996"/// \\param __a\n"
16997"/// A 128-bit integer vector containing the minuends.\n"
16998"/// \\param __b\n"
16999"/// A 128-bit integer vector containing the subtrahends.\n"
17000"/// \\returns A 128-bit integer vector containing the differences of the values\n"
17001"/// in the operands.\n"
17002"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17003"_mm_sub_epi64(__m128i __a, __m128i __b)\n"
17004"{\n"
17005" return (__m128i)((__v2du)__a - (__v2du)__b);\n"
17006"}\n"
17007"\n"
17008"/// Subtracts corresponding 8-bit signed integer values in the input and\n"
17009"/// returns the differences in the corresponding bytes in the destination.\n"
17010"/// Differences greater than 0x7F are saturated to 0x7F, and differences less\n"
17011"/// than 0x80 are saturated to 0x80.\n"
17012"///\n"
17013"/// \\headerfile <x86intrin.h>\n"
17014"///\n"
17015"/// This intrinsic corresponds to the <c> VPSUBSB / PSUBSB </c> instruction.\n"
17016"///\n"
17017"/// \\param __a\n"
17018"/// A 128-bit integer vector containing the minuends.\n"
17019"/// \\param __b\n"
17020"/// A 128-bit integer vector containing the subtrahends.\n"
17021"/// \\returns A 128-bit integer vector containing the differences of the values\n"
17022"/// in the operands.\n"
17023"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17024"_mm_subs_epi8(__m128i __a, __m128i __b)\n"
17025"{\n"
17026" return (__m128i)__builtin_ia32_psubsb128((__v16qi)__a, (__v16qi)__b);\n"
17027"}\n"
17028"\n"
17029"/// Subtracts corresponding 16-bit signed integer values in the input and\n"
17030"/// returns the differences in the corresponding bytes in the destination.\n"
17031"/// Differences greater than 0x7FFF are saturated to 0x7FFF, and values less\n"
17032"/// than 0x8000 are saturated to 0x8000.\n"
17033"///\n"
17034"/// \\headerfile <x86intrin.h>\n"
17035"///\n"
17036"/// This intrinsic corresponds to the <c> VPSUBSW / PSUBSW </c> instruction.\n"
17037"///\n"
17038"/// \\param __a\n"
17039"/// A 128-bit integer vector containing the minuends.\n"
17040"/// \\param __b\n"
17041"/// A 128-bit integer vector containing the subtrahends.\n"
17042"/// \\returns A 128-bit integer vector containing the differences of the values\n"
17043"/// in the operands.\n"
17044"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17045"_mm_subs_epi16(__m128i __a, __m128i __b)\n"
17046"{\n"
17047" return (__m128i)__builtin_ia32_psubsw128((__v8hi)__a, (__v8hi)__b);\n"
17048"}\n"
17049"\n"
17050"/// Subtracts corresponding 8-bit unsigned integer values in the input\n"
17051"/// and returns the differences in the corresponding bytes in the\n"
17052"/// destination. Differences less than 0x00 are saturated to 0x00.\n"
17053"///\n"
17054"/// \\headerfile <x86intrin.h>\n"
17055"///\n"
17056"/// This intrinsic corresponds to the <c> VPSUBUSB / PSUBUSB </c> instruction.\n"
17057"///\n"
17058"/// \\param __a\n"
17059"/// A 128-bit integer vector containing the minuends.\n"
17060"/// \\param __b\n"
17061"/// A 128-bit integer vector containing the subtrahends.\n"
17062"/// \\returns A 128-bit integer vector containing the unsigned integer\n"
17063"/// differences of the values in the operands.\n"
17064"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17065"_mm_subs_epu8(__m128i __a, __m128i __b)\n"
17066"{\n"
17067" return (__m128i)__builtin_ia32_psubusb128((__v16qi)__a, (__v16qi)__b);\n"
17068"}\n"
17069"\n"
17070"/// Subtracts corresponding 16-bit unsigned integer values in the input\n"
17071"/// and returns the differences in the corresponding bytes in the\n"
17072"/// destination. Differences less than 0x0000 are saturated to 0x0000.\n"
17073"///\n"
17074"/// \\headerfile <x86intrin.h>\n"
17075"///\n"
17076"/// This intrinsic corresponds to the <c> VPSUBUSW / PSUBUSW </c> instruction.\n"
17077"///\n"
17078"/// \\param __a\n"
17079"/// A 128-bit integer vector containing the minuends.\n"
17080"/// \\param __b\n"
17081"/// A 128-bit integer vector containing the subtrahends.\n"
17082"/// \\returns A 128-bit integer vector containing the unsigned integer\n"
17083"/// differences of the values in the operands.\n"
17084"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17085"_mm_subs_epu16(__m128i __a, __m128i __b)\n"
17086"{\n"
17087" return (__m128i)__builtin_ia32_psubusw128((__v8hi)__a, (__v8hi)__b);\n"
17088"}\n"
17089"\n"
17090"/// Performs a bitwise AND of two 128-bit integer vectors.\n"
17091"///\n"
17092"/// \\headerfile <x86intrin.h>\n"
17093"///\n"
17094"/// This intrinsic corresponds to the <c> VPAND / PAND </c> instruction.\n"
17095"///\n"
17096"/// \\param __a\n"
17097"/// A 128-bit integer vector containing one of the source operands.\n"
17098"/// \\param __b\n"
17099"/// A 128-bit integer vector containing one of the source operands.\n"
17100"/// \\returns A 128-bit integer vector containing the bitwise AND of the values\n"
17101"/// in both operands.\n"
17102"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17103"_mm_and_si128(__m128i __a, __m128i __b)\n"
17104"{\n"
17105" return (__m128i)((__v2du)__a & (__v2du)__b);\n"
17106"}\n"
17107"\n"
17108"/// Performs a bitwise AND of two 128-bit integer vectors, using the\n"
17109"/// one's complement of the values contained in the first source operand.\n"
17110"///\n"
17111"/// \\headerfile <x86intrin.h>\n"
17112"///\n"
17113"/// This intrinsic corresponds to the <c> VPANDN / PANDN </c> instruction.\n"
17114"///\n"
17115"/// \\param __a\n"
17116"/// A 128-bit vector containing the left source operand. The one's complement\n"
17117"/// of this value is used in the bitwise AND.\n"
17118"/// \\param __b\n"
17119"/// A 128-bit vector containing the right source operand.\n"
17120"/// \\returns A 128-bit integer vector containing the bitwise AND of the one's\n"
17121"/// complement of the first operand and the values in the second operand.\n"
17122"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17123"_mm_andnot_si128(__m128i __a, __m128i __b)\n"
17124"{\n"
17125" return (__m128i)(~(__v2du)__a & (__v2du)__b);\n"
17126"}\n"
17127"/// Performs a bitwise OR of two 128-bit integer vectors.\n"
17128"///\n"
17129"/// \\headerfile <x86intrin.h>\n"
17130"///\n"
17131"/// This intrinsic corresponds to the <c> VPOR / POR </c> instruction.\n"
17132"///\n"
17133"/// \\param __a\n"
17134"/// A 128-bit integer vector containing one of the source operands.\n"
17135"/// \\param __b\n"
17136"/// A 128-bit integer vector containing one of the source operands.\n"
17137"/// \\returns A 128-bit integer vector containing the bitwise OR of the values\n"
17138"/// in both operands.\n"
17139"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17140"_mm_or_si128(__m128i __a, __m128i __b)\n"
17141"{\n"
17142" return (__m128i)((__v2du)__a | (__v2du)__b);\n"
17143"}\n"
17144"\n"
17145"/// Performs a bitwise exclusive OR of two 128-bit integer vectors.\n"
17146"///\n"
17147"/// \\headerfile <x86intrin.h>\n"
17148"///\n"
17149"/// This intrinsic corresponds to the <c> VPXOR / PXOR </c> instruction.\n"
17150"///\n"
17151"/// \\param __a\n"
17152"/// A 128-bit integer vector containing one of the source operands.\n"
17153"/// \\param __b\n"
17154"/// A 128-bit integer vector containing one of the source operands.\n"
17155"/// \\returns A 128-bit integer vector containing the bitwise exclusive OR of the\n"
17156"/// values in both operands.\n"
17157"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17158"_mm_xor_si128(__m128i __a, __m128i __b)\n"
17159"{\n"
17160" return (__m128i)((__v2du)__a ^ (__v2du)__b);\n"
17161"}\n"
17162"\n"
17163"/// Left-shifts the 128-bit integer vector operand by the specified\n"
17164"/// number of bytes. Low-order bits are cleared.\n"
17165"///\n"
17166"/// \\headerfile <x86intrin.h>\n"
17167"///\n"
17168"/// \\code\n"
17169"/// __m128i _mm_slli_si128(__m128i a, const int imm);\n"
17170"/// \\endcode\n"
17171"///\n"
17172"/// This intrinsic corresponds to the <c> VPSLLDQ / PSLLDQ </c> instruction.\n"
17173"///\n"
17174"/// \\param a\n"
17175"/// A 128-bit integer vector containing the source operand.\n"
17176"/// \\param imm\n"
17177"/// An immediate value specifying the number of bytes to left-shift operand\n"
17178"/// \\a a.\n"
17179"/// \\returns A 128-bit integer vector containing the left-shifted value.\n"
17180"#define _mm_slli_si128(a, imm) \\\n"
17181" (__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))\n"
17182"\n"
17183"#define _mm_bslli_si128(a, imm) \\\n"
17184" (__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))\n"
17185"\n"
17186"/// Left-shifts each 16-bit value in the 128-bit integer vector operand\n"
17187"/// by the specified number of bits. Low-order bits are cleared.\n"
17188"///\n"
17189"/// \\headerfile <x86intrin.h>\n"
17190"///\n"
17191"/// This intrinsic corresponds to the <c> VPSLLW / PSLLW </c> instruction.\n"
17192"///\n"
17193"/// \\param __a\n"
17194"/// A 128-bit integer vector containing the source operand.\n"
17195"/// \\param __count\n"
17196"/// An integer value specifying the number of bits to left-shift each value\n"
17197"/// in operand \\a __a.\n"
17198"/// \\returns A 128-bit integer vector containing the left-shifted values.\n"
17199"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17200"_mm_slli_epi16(__m128i __a, int __count)\n"
17201"{\n"
17202" return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count);\n"
17203"}\n"
17204"\n"
17205"/// Left-shifts each 16-bit value in the 128-bit integer vector operand\n"
17206"/// by the specified number of bits. Low-order bits are cleared.\n"
17207"///\n"
17208"/// \\headerfile <x86intrin.h>\n"
17209"///\n"
17210"/// This intrinsic corresponds to the <c> VPSLLW / PSLLW </c> instruction.\n"
17211"///\n"
17212"/// \\param __a\n"
17213"/// A 128-bit integer vector containing the source operand.\n"
17214"/// \\param __count\n"
17215"/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n"
17216"/// to left-shift each value in operand \\a __a.\n"
17217"/// \\returns A 128-bit integer vector containing the left-shifted values.\n"
17218"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17219"_mm_sll_epi16(__m128i __a, __m128i __count)\n"
17220"{\n"
17221" return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count);\n"
17222"}\n"
17223"\n"
17224"/// Left-shifts each 32-bit value in the 128-bit integer vector operand\n"
17225"/// by the specified number of bits. Low-order bits are cleared.\n"
17226"///\n"
17227"/// \\headerfile <x86intrin.h>\n"
17228"///\n"
17229"/// This intrinsic corresponds to the <c> VPSLLD / PSLLD </c> instruction.\n"
17230"///\n"
17231"/// \\param __a\n"
17232"/// A 128-bit integer vector containing the source operand.\n"
17233"/// \\param __count\n"
17234"/// An integer value specifying the number of bits to left-shift each value\n"
17235"/// in operand \\a __a.\n"
17236"/// \\returns A 128-bit integer vector containing the left-shifted values.\n"
17237"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17238"_mm_slli_epi32(__m128i __a, int __count)\n"
17239"{\n"
17240" return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count);\n"
17241"}\n"
17242"\n"
17243"/// Left-shifts each 32-bit value in the 128-bit integer vector operand\n"
17244"/// by the specified number of bits. Low-order bits are cleared.\n"
17245"///\n"
17246"/// \\headerfile <x86intrin.h>\n"
17247"///\n"
17248"/// This intrinsic corresponds to the <c> VPSLLD / PSLLD </c> instruction.\n"
17249"///\n"
17250"/// \\param __a\n"
17251"/// A 128-bit integer vector containing the source operand.\n"
17252"/// \\param __count\n"
17253"/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n"
17254"/// to left-shift each value in operand \\a __a.\n"
17255"/// \\returns A 128-bit integer vector containing the left-shifted values.\n"
17256"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17257"_mm_sll_epi32(__m128i __a, __m128i __count)\n"
17258"{\n"
17259" return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count);\n"
17260"}\n"
17261"\n"
17262"/// Left-shifts each 64-bit value in the 128-bit integer vector operand\n"
17263"/// by the specified number of bits. Low-order bits are cleared.\n"
17264"///\n"
17265"/// \\headerfile <x86intrin.h>\n"
17266"///\n"
17267"/// This intrinsic corresponds to the <c> VPSLLQ / PSLLQ </c> instruction.\n"
17268"///\n"
17269"/// \\param __a\n"
17270"/// A 128-bit integer vector containing the source operand.\n"
17271"/// \\param __count\n"
17272"/// An integer value specifying the number of bits to left-shift each value\n"
17273"/// in operand \\a __a.\n"
17274"/// \\returns A 128-bit integer vector containing the left-shifted values.\n"
17275"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17276"_mm_slli_epi64(__m128i __a, int __count)\n"
17277"{\n"
17278" return __builtin_ia32_psllqi128((__v2di)__a, __count);\n"
17279"}\n"
17280"\n"
17281"/// Left-shifts each 64-bit value in the 128-bit integer vector operand\n"
17282"/// by the specified number of bits. Low-order bits are cleared.\n"
17283"///\n"
17284"/// \\headerfile <x86intrin.h>\n"
17285"///\n"
17286"/// This intrinsic corresponds to the <c> VPSLLQ / PSLLQ </c> instruction.\n"
17287"///\n"
17288"/// \\param __a\n"
17289"/// A 128-bit integer vector containing the source operand.\n"
17290"/// \\param __count\n"
17291"/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n"
17292"/// to left-shift each value in operand \\a __a.\n"
17293"/// \\returns A 128-bit integer vector containing the left-shifted values.\n"
17294"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17295"_mm_sll_epi64(__m128i __a, __m128i __count)\n"
17296"{\n"
17297" return __builtin_ia32_psllq128((__v2di)__a, (__v2di)__count);\n"
17298"}\n"
17299"\n"
17300"/// Right-shifts each 16-bit value in the 128-bit integer vector operand\n"
17301"/// by the specified number of bits. High-order bits are filled with the sign\n"
17302"/// bit of the initial value.\n"
17303"///\n"
17304"/// \\headerfile <x86intrin.h>\n"
17305"///\n"
17306"/// This intrinsic corresponds to the <c> VPSRAW / PSRAW </c> instruction.\n"
17307"///\n"
17308"/// \\param __a\n"
17309"/// A 128-bit integer vector containing the source operand.\n"
17310"/// \\param __count\n"
17311"/// An integer value specifying the number of bits to right-shift each value\n"
17312"/// in operand \\a __a.\n"
17313"/// \\returns A 128-bit integer vector containing the right-shifted values.\n"
17314"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17315"_mm_srai_epi16(__m128i __a, int __count)\n"
17316"{\n"
17317" return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count);\n"
17318"}\n"
17319"\n"
17320"/// Right-shifts each 16-bit value in the 128-bit integer vector operand\n"
17321"/// by the specified number of bits. High-order bits are filled with the sign\n"
17322"/// bit of the initial value.\n"
17323"///\n"
17324"/// \\headerfile <x86intrin.h>\n"
17325"///\n"
17326"/// This intrinsic corresponds to the <c> VPSRAW / PSRAW </c> instruction.\n"
17327"///\n"
17328"/// \\param __a\n"
17329"/// A 128-bit integer vector containing the source operand.\n"
17330"/// \\param __count\n"
17331"/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n"
17332"/// to right-shift each value in operand \\a __a.\n"
17333"/// \\returns A 128-bit integer vector containing the right-shifted values.\n"
17334"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17335"_mm_sra_epi16(__m128i __a, __m128i __count)\n"
17336"{\n"
17337" return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count);\n"
17338"}\n"
17339"\n"
17340"/// Right-shifts each 32-bit value in the 128-bit integer vector operand\n"
17341"/// by the specified number of bits. High-order bits are filled with the sign\n"
17342"/// bit of the initial value.\n"
17343"///\n"
17344"/// \\headerfile <x86intrin.h>\n"
17345"///\n"
17346"/// This intrinsic corresponds to the <c> VPSRAD / PSRAD </c> instruction.\n"
17347"///\n"
17348"/// \\param __a\n"
17349"/// A 128-bit integer vector containing the source operand.\n"
17350"/// \\param __count\n"
17351"/// An integer value specifying the number of bits to right-shift each value\n"
17352"/// in operand \\a __a.\n"
17353"/// \\returns A 128-bit integer vector containing the right-shifted values.\n"
17354"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17355"_mm_srai_epi32(__m128i __a, int __count)\n"
17356"{\n"
17357" return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count);\n"
17358"}\n"
17359"\n"
17360"/// Right-shifts each 32-bit value in the 128-bit integer vector operand\n"
17361"/// by the specified number of bits. High-order bits are filled with the sign\n"
17362"/// bit of the initial value.\n"
17363"///\n"
17364"/// \\headerfile <x86intrin.h>\n"
17365"///\n"
17366"/// This intrinsic corresponds to the <c> VPSRAD / PSRAD </c> instruction.\n"
17367"///\n"
17368"/// \\param __a\n"
17369"/// A 128-bit integer vector containing the source operand.\n"
17370"/// \\param __count\n"
17371"/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n"
17372"/// to right-shift each value in operand \\a __a.\n"
17373"/// \\returns A 128-bit integer vector containing the right-shifted values.\n"
17374"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17375"_mm_sra_epi32(__m128i __a, __m128i __count)\n"
17376"{\n"
17377" return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count);\n"
17378"}\n"
17379"\n"
17380"/// Right-shifts the 128-bit integer vector operand by the specified\n"
17381"/// number of bytes. High-order bits are cleared.\n"
17382"///\n"
17383"/// \\headerfile <x86intrin.h>\n"
17384"///\n"
17385"/// \\code\n"
17386"/// __m128i _mm_srli_si128(__m128i a, const int imm);\n"
17387"/// \\endcode\n"
17388"///\n"
17389"/// This intrinsic corresponds to the <c> VPSRLDQ / PSRLDQ </c> instruction.\n"
17390"///\n"
17391"/// \\param a\n"
17392"/// A 128-bit integer vector containing the source operand.\n"
17393"/// \\param imm\n"
17394"/// An immediate value specifying the number of bytes to right-shift operand\n"
17395"/// \\a a.\n"
17396"/// \\returns A 128-bit integer vector containing the right-shifted value.\n"
17397"#define _mm_srli_si128(a, imm) \\\n"
17398" (__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))\n"
17399"\n"
17400"#define _mm_bsrli_si128(a, imm) \\\n"
17401" (__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))\n"
17402"\n"
17403"/// Right-shifts each of 16-bit values in the 128-bit integer vector\n"
17404"/// operand by the specified number of bits. High-order bits are cleared.\n"
17405"///\n"
17406"/// \\headerfile <x86intrin.h>\n"
17407"///\n"
17408"/// This intrinsic corresponds to the <c> VPSRLW / PSRLW </c> instruction.\n"
17409"///\n"
17410"/// \\param __a\n"
17411"/// A 128-bit integer vector containing the source operand.\n"
17412"/// \\param __count\n"
17413"/// An integer value specifying the number of bits to right-shift each value\n"
17414"/// in operand \\a __a.\n"
17415"/// \\returns A 128-bit integer vector containing the right-shifted values.\n"
17416"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17417"_mm_srli_epi16(__m128i __a, int __count)\n"
17418"{\n"
17419" return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count);\n"
17420"}\n"
17421"\n"
17422"/// Right-shifts each of 16-bit values in the 128-bit integer vector\n"
17423"/// operand by the specified number of bits. High-order bits are cleared.\n"
17424"///\n"
17425"/// \\headerfile <x86intrin.h>\n"
17426"///\n"
17427"/// This intrinsic corresponds to the <c> VPSRLW / PSRLW </c> instruction.\n"
17428"///\n"
17429"/// \\param __a\n"
17430"/// A 128-bit integer vector containing the source operand.\n"
17431"/// \\param __count\n"
17432"/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n"
17433"/// to right-shift each value in operand \\a __a.\n"
17434"/// \\returns A 128-bit integer vector containing the right-shifted values.\n"
17435"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17436"_mm_srl_epi16(__m128i __a, __m128i __count)\n"
17437"{\n"
17438" return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count);\n"
17439"}\n"
17440"\n"
17441"/// Right-shifts each of 32-bit values in the 128-bit integer vector\n"
17442"/// operand by the specified number of bits. High-order bits are cleared.\n"
17443"///\n"
17444"/// \\headerfile <x86intrin.h>\n"
17445"///\n"
17446"/// This intrinsic corresponds to the <c> VPSRLD / PSRLD </c> instruction.\n"
17447"///\n"
17448"/// \\param __a\n"
17449"/// A 128-bit integer vector containing the source operand.\n"
17450"/// \\param __count\n"
17451"/// An integer value specifying the number of bits to right-shift each value\n"
17452"/// in operand \\a __a.\n"
17453"/// \\returns A 128-bit integer vector containing the right-shifted values.\n"
17454"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17455"_mm_srli_epi32(__m128i __a, int __count)\n"
17456"{\n"
17457" return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count);\n"
17458"}\n"
17459"\n"
17460"/// Right-shifts each of 32-bit values in the 128-bit integer vector\n"
17461"/// operand by the specified number of bits. High-order bits are cleared.\n"
17462"///\n"
17463"/// \\headerfile <x86intrin.h>\n"
17464"///\n"
17465"/// This intrinsic corresponds to the <c> VPSRLD / PSRLD </c> instruction.\n"
17466"///\n"
17467"/// \\param __a\n"
17468"/// A 128-bit integer vector containing the source operand.\n"
17469"/// \\param __count\n"
17470"/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n"
17471"/// to right-shift each value in operand \\a __a.\n"
17472"/// \\returns A 128-bit integer vector containing the right-shifted values.\n"
17473"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17474"_mm_srl_epi32(__m128i __a, __m128i __count)\n"
17475"{\n"
17476" return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count);\n"
17477"}\n"
17478"\n"
17479"/// Right-shifts each of 64-bit values in the 128-bit integer vector\n"
17480"/// operand by the specified number of bits. High-order bits are cleared.\n"
17481"///\n"
17482"/// \\headerfile <x86intrin.h>\n"
17483"///\n"
17484"/// This intrinsic corresponds to the <c> VPSRLQ / PSRLQ </c> instruction.\n"
17485"///\n"
17486"/// \\param __a\n"
17487"/// A 128-bit integer vector containing the source operand.\n"
17488"/// \\param __count\n"
17489"/// An integer value specifying the number of bits to right-shift each value\n"
17490"/// in operand \\a __a.\n"
17491"/// \\returns A 128-bit integer vector containing the right-shifted values.\n"
17492"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17493"_mm_srli_epi64(__m128i __a, int __count)\n"
17494"{\n"
17495" return __builtin_ia32_psrlqi128((__v2di)__a, __count);\n"
17496"}\n"
17497"\n"
17498"/// Right-shifts each of 64-bit values in the 128-bit integer vector\n"
17499"/// operand by the specified number of bits. High-order bits are cleared.\n"
17500"///\n"
17501"/// \\headerfile <x86intrin.h>\n"
17502"///\n"
17503"/// This intrinsic corresponds to the <c> VPSRLQ / PSRLQ </c> instruction.\n"
17504"///\n"
17505"/// \\param __a\n"
17506"/// A 128-bit integer vector containing the source operand.\n"
17507"/// \\param __count\n"
17508"/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n"
17509"/// to right-shift each value in operand \\a __a.\n"
17510"/// \\returns A 128-bit integer vector containing the right-shifted values.\n"
17511"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17512"_mm_srl_epi64(__m128i __a, __m128i __count)\n"
17513"{\n"
17514" return __builtin_ia32_psrlq128((__v2di)__a, (__v2di)__count);\n"
17515"}\n"
17516"\n"
17517"/// Compares each of the corresponding 8-bit values of the 128-bit\n"
17518"/// integer vectors for equality. Each comparison yields 0x0 for false, 0xFF\n"
17519"/// for true.\n"
17520"///\n"
17521"/// \\headerfile <x86intrin.h>\n"
17522"///\n"
17523"/// This intrinsic corresponds to the <c> VPCMPEQB / PCMPEQB </c> instruction.\n"
17524"///\n"
17525"/// \\param __a\n"
17526"/// A 128-bit integer vector.\n"
17527"/// \\param __b\n"
17528"/// A 128-bit integer vector.\n"
17529"/// \\returns A 128-bit integer vector containing the comparison results.\n"
17530"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17531"_mm_cmpeq_epi8(__m128i __a, __m128i __b)\n"
17532"{\n"
17533" return (__m128i)((__v16qi)__a == (__v16qi)__b);\n"
17534"}\n"
17535"\n"
17536"/// Compares each of the corresponding 16-bit values of the 128-bit\n"
17537"/// integer vectors for equality. Each comparison yields 0x0 for false,\n"
17538"/// 0xFFFF for true.\n"
17539"///\n"
17540"/// \\headerfile <x86intrin.h>\n"
17541"///\n"
17542"/// This intrinsic corresponds to the <c> VPCMPEQW / PCMPEQW </c> instruction.\n"
17543"///\n"
17544"/// \\param __a\n"
17545"/// A 128-bit integer vector.\n"
17546"/// \\param __b\n"
17547"/// A 128-bit integer vector.\n"
17548"/// \\returns A 128-bit integer vector containing the comparison results.\n"
17549"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17550"_mm_cmpeq_epi16(__m128i __a, __m128i __b)\n"
17551"{\n"
17552" return (__m128i)((__v8hi)__a == (__v8hi)__b);\n"
17553"}\n"
17554"\n"
17555"/// Compares each of the corresponding 32-bit values of the 128-bit\n"
17556"/// integer vectors for equality. Each comparison yields 0x0 for false,\n"
17557"/// 0xFFFFFFFF for true.\n"
17558"///\n"
17559"/// \\headerfile <x86intrin.h>\n"
17560"///\n"
17561"/// This intrinsic corresponds to the <c> VPCMPEQD / PCMPEQD </c> instruction.\n"
17562"///\n"
17563"/// \\param __a\n"
17564"/// A 128-bit integer vector.\n"
17565"/// \\param __b\n"
17566"/// A 128-bit integer vector.\n"
17567"/// \\returns A 128-bit integer vector containing the comparison results.\n"
17568"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17569"_mm_cmpeq_epi32(__m128i __a, __m128i __b)\n"
17570"{\n"
17571" return (__m128i)((__v4si)__a == (__v4si)__b);\n"
17572"}\n"
17573"\n"
17574"/// Compares each of the corresponding signed 8-bit values of the 128-bit\n"
17575"/// integer vectors to determine if the values in the first operand are\n"
17576"/// greater than those in the second operand. Each comparison yields 0x0 for\n"
17577"/// false, 0xFF for true.\n"
17578"///\n"
17579"/// \\headerfile <x86intrin.h>\n"
17580"///\n"
17581"/// This intrinsic corresponds to the <c> VPCMPGTB / PCMPGTB </c> instruction.\n"
17582"///\n"
17583"/// \\param __a\n"
17584"/// A 128-bit integer vector.\n"
17585"/// \\param __b\n"
17586"/// A 128-bit integer vector.\n"
17587"/// \\returns A 128-bit integer vector containing the comparison results.\n"
17588"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17589"_mm_cmpgt_epi8(__m128i __a, __m128i __b)\n"
17590"{\n"
17591" /* This function always performs a signed comparison, but __v16qi is a char\n"
17592" which may be signed or unsigned, so use __v16qs. */\n"
17593" return (__m128i)((__v16qs)__a > (__v16qs)__b);\n"
17594"}\n"
17595"\n"
17596"/// Compares each of the corresponding signed 16-bit values of the\n"
17597"/// 128-bit integer vectors to determine if the values in the first operand\n"
17598"/// are greater than those in the second operand.\n"
17599"///\n"
17600"/// Each comparison yields 0x0 for false, 0xFFFF for true.\n"
17601"///\n"
17602"/// \\headerfile <x86intrin.h>\n"
17603"///\n"
17604"/// This intrinsic corresponds to the <c> VPCMPGTW / PCMPGTW </c> instruction.\n"
17605"///\n"
17606"/// \\param __a\n"
17607"/// A 128-bit integer vector.\n"
17608"/// \\param __b\n"
17609"/// A 128-bit integer vector.\n"
17610"/// \\returns A 128-bit integer vector containing the comparison results.\n"
17611"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17612"_mm_cmpgt_epi16(__m128i __a, __m128i __b)\n"
17613"{\n"
17614" return (__m128i)((__v8hi)__a > (__v8hi)__b);\n"
17615"}\n"
17616"\n"
17617"/// Compares each of the corresponding signed 32-bit values of the\n"
17618"/// 128-bit integer vectors to determine if the values in the first operand\n"
17619"/// are greater than those in the second operand.\n"
17620"///\n"
17621"/// Each comparison yields 0x0 for false, 0xFFFFFFFF for true.\n"
17622"///\n"
17623"/// \\headerfile <x86intrin.h>\n"
17624"///\n"
17625"/// This intrinsic corresponds to the <c> VPCMPGTD / PCMPGTD </c> instruction.\n"
17626"///\n"
17627"/// \\param __a\n"
17628"/// A 128-bit integer vector.\n"
17629"/// \\param __b\n"
17630"/// A 128-bit integer vector.\n"
17631"/// \\returns A 128-bit integer vector containing the comparison results.\n"
17632"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17633"_mm_cmpgt_epi32(__m128i __a, __m128i __b)\n"
17634"{\n"
17635" return (__m128i)((__v4si)__a > (__v4si)__b);\n"
17636"}\n"
17637"\n"
17638"/// Compares each of the corresponding signed 8-bit values of the 128-bit\n"
17639"/// integer vectors to determine if the values in the first operand are less\n"
17640"/// than those in the second operand.\n"
17641"///\n"
17642"/// Each comparison yields 0x0 for false, 0xFF for true.\n"
17643"///\n"
17644"/// \\headerfile <x86intrin.h>\n"
17645"///\n"
17646"/// This intrinsic corresponds to the <c> VPCMPGTB / PCMPGTB </c> instruction.\n"
17647"///\n"
17648"/// \\param __a\n"
17649"/// A 128-bit integer vector.\n"
17650"/// \\param __b\n"
17651"/// A 128-bit integer vector.\n"
17652"/// \\returns A 128-bit integer vector containing the comparison results.\n"
17653"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17654"_mm_cmplt_epi8(__m128i __a, __m128i __b)\n"
17655"{\n"
17656" return _mm_cmpgt_epi8(__b, __a);\n"
17657"}\n"
17658"\n"
17659"/// Compares each of the corresponding signed 16-bit values of the\n"
17660"/// 128-bit integer vectors to determine if the values in the first operand\n"
17661"/// are less than those in the second operand.\n"
17662"///\n"
17663"/// Each comparison yields 0x0 for false, 0xFFFF for true.\n"
17664"///\n"
17665"/// \\headerfile <x86intrin.h>\n"
17666"///\n"
17667"/// This intrinsic corresponds to the <c> VPCMPGTW / PCMPGTW </c> instruction.\n"
17668"///\n"
17669"/// \\param __a\n"
17670"/// A 128-bit integer vector.\n"
17671"/// \\param __b\n"
17672"/// A 128-bit integer vector.\n"
17673"/// \\returns A 128-bit integer vector containing the comparison results.\n"
17674"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17675"_mm_cmplt_epi16(__m128i __a, __m128i __b)\n"
17676"{\n"
17677" return _mm_cmpgt_epi16(__b, __a);\n"
17678"}\n"
17679"\n"
17680"/// Compares each of the corresponding signed 32-bit values of the\n"
17681"/// 128-bit integer vectors to determine if the values in the first operand\n"
17682"/// are less than those in the second operand.\n"
17683"///\n"
17684"/// Each comparison yields 0x0 for false, 0xFFFFFFFF for true.\n"
17685"///\n"
17686"/// \\headerfile <x86intrin.h>\n"
17687"///\n"
17688"/// This intrinsic corresponds to the <c> VPCMPGTD / PCMPGTD </c> instruction.\n"
17689"///\n"
17690"/// \\param __a\n"
17691"/// A 128-bit integer vector.\n"
17692"/// \\param __b\n"
17693"/// A 128-bit integer vector.\n"
17694"/// \\returns A 128-bit integer vector containing the comparison results.\n"
17695"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17696"_mm_cmplt_epi32(__m128i __a, __m128i __b)\n"
17697"{\n"
17698" return _mm_cmpgt_epi32(__b, __a);\n"
17699"}\n"
17700"\n"
17701"#ifdef __x86_64__\n"
17702"/// Converts a 64-bit signed integer value from the second operand into a\n"
17703"/// double-precision value and returns it in the lower element of a [2 x\n"
17704"/// double] vector; the upper element of the returned vector is copied from\n"
17705"/// the upper element of the first operand.\n"
17706"///\n"
17707"/// \\headerfile <x86intrin.h>\n"
17708"///\n"
17709"/// This intrinsic corresponds to the <c> VCVTSI2SD / CVTSI2SD </c> instruction.\n"
17710"///\n"
17711"/// \\param __a\n"
17712"/// A 128-bit vector of [2 x double]. The upper 64 bits of this operand are\n"
17713"/// copied to the upper 64 bits of the destination.\n"
17714"/// \\param __b\n"
17715"/// A 64-bit signed integer operand containing the value to be converted.\n"
17716"/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n"
17717"/// converted value of the second operand. The upper 64 bits are copied from\n"
17718"/// the upper 64 bits of the first operand.\n"
17719"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
17720"_mm_cvtsi64_sd(__m128d __a, long long __b)\n"
17721"{\n"
17722" __a[0] = __b;\n"
17723" return __a;\n"
17724"}\n"
17725"\n"
17726"/// Converts the first (lower) element of a vector of [2 x double] into a\n"
17727"/// 64-bit signed integer value, according to the current rounding mode.\n"
17728"///\n"
17729"/// \\headerfile <x86intrin.h>\n"
17730"///\n"
17731"/// This intrinsic corresponds to the <c> VCVTSD2SI / CVTSD2SI </c> instruction.\n"
17732"///\n"
17733"/// \\param __a\n"
17734"/// A 128-bit vector of [2 x double]. The lower 64 bits are used in the\n"
17735"/// conversion.\n"
17736"/// \\returns A 64-bit signed integer containing the converted value.\n"
17737"static __inline__ long long __DEFAULT_FN_ATTRS\n"
17738"_mm_cvtsd_si64(__m128d __a)\n"
17739"{\n"
17740" return __builtin_ia32_cvtsd2si64((__v2df)__a);\n"
17741"}\n"
17742"\n"
17743"/// Converts the first (lower) element of a vector of [2 x double] into a\n"
17744"/// 64-bit signed integer value, truncating the result when it is inexact.\n"
17745"///\n"
17746"/// \\headerfile <x86intrin.h>\n"
17747"///\n"
17748"/// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c>\n"
17749"/// instruction.\n"
17750"///\n"
17751"/// \\param __a\n"
17752"/// A 128-bit vector of [2 x double]. The lower 64 bits are used in the\n"
17753"/// conversion.\n"
17754"/// \\returns A 64-bit signed integer containing the converted value.\n"
17755"static __inline__ long long __DEFAULT_FN_ATTRS\n"
17756"_mm_cvttsd_si64(__m128d __a)\n"
17757"{\n"
17758" return __builtin_ia32_cvttsd2si64((__v2df)__a);\n"
17759"}\n"
17760"#endif\n"
17761"\n"
17762"/// Converts a vector of [4 x i32] into a vector of [4 x float].\n"
17763"///\n"
17764"/// \\headerfile <x86intrin.h>\n"
17765"///\n"
17766"/// This intrinsic corresponds to the <c> VCVTDQ2PS / CVTDQ2PS </c> instruction.\n"
17767"///\n"
17768"/// \\param __a\n"
17769"/// A 128-bit integer vector.\n"
17770"/// \\returns A 128-bit vector of [4 x float] containing the converted values.\n"
17771"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
17772"_mm_cvtepi32_ps(__m128i __a)\n"
17773"{\n"
17774" return (__m128)__builtin_convertvector((__v4si)__a, __v4sf);\n"
17775"}\n"
17776"\n"
17777"/// Converts a vector of [4 x float] into a vector of [4 x i32].\n"
17778"///\n"
17779"/// \\headerfile <x86intrin.h>\n"
17780"///\n"
17781"/// This intrinsic corresponds to the <c> VCVTPS2DQ / CVTPS2DQ </c> instruction.\n"
17782"///\n"
17783"/// \\param __a\n"
17784"/// A 128-bit vector of [4 x float].\n"
17785"/// \\returns A 128-bit integer vector of [4 x i32] containing the converted\n"
17786"/// values.\n"
17787"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17788"_mm_cvtps_epi32(__m128 __a)\n"
17789"{\n"
17790" return (__m128i)__builtin_ia32_cvtps2dq((__v4sf)__a);\n"
17791"}\n"
17792"\n"
17793"/// Converts a vector of [4 x float] into a vector of [4 x i32],\n"
17794"/// truncating the result when it is inexact.\n"
17795"///\n"
17796"/// \\headerfile <x86intrin.h>\n"
17797"///\n"
17798"/// This intrinsic corresponds to the <c> VCVTTPS2DQ / CVTTPS2DQ </c>\n"
17799"/// instruction.\n"
17800"///\n"
17801"/// \\param __a\n"
17802"/// A 128-bit vector of [4 x float].\n"
17803"/// \\returns A 128-bit vector of [4 x i32] containing the converted values.\n"
17804"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17805"_mm_cvttps_epi32(__m128 __a)\n"
17806"{\n"
17807" return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)__a);\n"
17808"}\n"
17809"\n"
17810"/// Returns a vector of [4 x i32] where the lowest element is the input\n"
17811"/// operand and the remaining elements are zero.\n"
17812"///\n"
17813"/// \\headerfile <x86intrin.h>\n"
17814"///\n"
17815"/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.\n"
17816"///\n"
17817"/// \\param __a\n"
17818"/// A 32-bit signed integer operand.\n"
17819"/// \\returns A 128-bit vector of [4 x i32].\n"
17820"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17821"_mm_cvtsi32_si128(int __a)\n"
17822"{\n"
17823" return __extension__ (__m128i)(__v4si){ __a, 0, 0, 0 };\n"
17824"}\n"
17825"\n"
17826"#ifdef __x86_64__\n"
17827"/// Returns a vector of [2 x i64] where the lower element is the input\n"
17828"/// operand and the upper element is zero.\n"
17829"///\n"
17830"/// \\headerfile <x86intrin.h>\n"
17831"///\n"
17832"/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.\n"
17833"///\n"
17834"/// \\param __a\n"
17835"/// A 64-bit signed integer operand containing the value to be converted.\n"
17836"/// \\returns A 128-bit vector of [2 x i64] containing the converted value.\n"
17837"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17838"_mm_cvtsi64_si128(long long __a)\n"
17839"{\n"
17840" return __extension__ (__m128i)(__v2di){ __a, 0 };\n"
17841"}\n"
17842"#endif\n"
17843"\n"
17844"/// Moves the least significant 32 bits of a vector of [4 x i32] to a\n"
17845"/// 32-bit signed integer value.\n"
17846"///\n"
17847"/// \\headerfile <x86intrin.h>\n"
17848"///\n"
17849"/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.\n"
17850"///\n"
17851"/// \\param __a\n"
17852"/// A vector of [4 x i32]. The least significant 32 bits are moved to the\n"
17853"/// destination.\n"
17854"/// \\returns A 32-bit signed integer containing the moved value.\n"
17855"static __inline__ int __DEFAULT_FN_ATTRS\n"
17856"_mm_cvtsi128_si32(__m128i __a)\n"
17857"{\n"
17858" __v4si __b = (__v4si)__a;\n"
17859" return __b[0];\n"
17860"}\n"
17861"\n"
17862"#ifdef __x86_64__\n"
17863"/// Moves the least significant 64 bits of a vector of [2 x i64] to a\n"
17864"/// 64-bit signed integer value.\n"
17865"///\n"
17866"/// \\headerfile <x86intrin.h>\n"
17867"///\n"
17868"/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.\n"
17869"///\n"
17870"/// \\param __a\n"
17871"/// A vector of [2 x i64]. The least significant 64 bits are moved to the\n"
17872"/// destination.\n"
17873"/// \\returns A 64-bit signed integer containing the moved value.\n"
17874"static __inline__ long long __DEFAULT_FN_ATTRS\n"
17875"_mm_cvtsi128_si64(__m128i __a)\n"
17876"{\n"
17877" return __a[0];\n"
17878"}\n"
17879"#endif\n"
17880"\n"
17881"/// Moves packed integer values from an aligned 128-bit memory location\n"
17882"/// to elements in a 128-bit integer vector.\n"
17883"///\n"
17884"/// \\headerfile <x86intrin.h>\n"
17885"///\n"
17886"/// This intrinsic corresponds to the <c> VMOVDQA / MOVDQA </c> instruction.\n"
17887"///\n"
17888"/// \\param __p\n"
17889"/// An aligned pointer to a memory location containing integer values.\n"
17890"/// \\returns A 128-bit integer vector containing the moved values.\n"
17891"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17892"_mm_load_si128(__m128i const *__p)\n"
17893"{\n"
17894" return *__p;\n"
17895"}\n"
17896"\n"
17897"/// Moves packed integer values from an unaligned 128-bit memory location\n"
17898"/// to elements in a 128-bit integer vector.\n"
17899"///\n"
17900"/// \\headerfile <x86intrin.h>\n"
17901"///\n"
17902"/// This intrinsic corresponds to the <c> VMOVDQU / MOVDQU </c> instruction.\n"
17903"///\n"
17904"/// \\param __p\n"
17905"/// A pointer to a memory location containing integer values.\n"
17906"/// \\returns A 128-bit integer vector containing the moved values.\n"
17907"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17908"_mm_loadu_si128(__m128i const *__p)\n"
17909"{\n"
17910" struct __loadu_si128 {\n"
17911" __m128i __v;\n"
17912" } __attribute__((__packed__, __may_alias__));\n"
17913" return ((struct __loadu_si128*)__p)->__v;\n"
17914"}\n"
17915"\n"
17916"/// Returns a vector of [2 x i64] where the lower element is taken from\n"
17917"/// the lower element of the operand, and the upper element is zero.\n"
17918"///\n"
17919"/// \\headerfile <x86intrin.h>\n"
17920"///\n"
17921"/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.\n"
17922"///\n"
17923"/// \\param __p\n"
17924"/// A 128-bit vector of [2 x i64]. Bits [63:0] are written to bits [63:0] of\n"
17925"/// the destination.\n"
17926"/// \\returns A 128-bit vector of [2 x i64]. The lower order bits contain the\n"
17927"/// moved value. The higher order bits are cleared.\n"
17928"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17929"_mm_loadl_epi64(__m128i const *__p)\n"
17930"{\n"
17931" struct __mm_loadl_epi64_struct {\n"
17932" long long __u;\n"
17933" } __attribute__((__packed__, __may_alias__));\n"
17934" return __extension__ (__m128i) { ((struct __mm_loadl_epi64_struct*)__p)->__u, 0};\n"
17935"}\n"
17936"\n"
17937"/// Generates a 128-bit vector of [4 x i32] with unspecified content.\n"
17938"/// This could be used as an argument to another intrinsic function where the\n"
17939"/// argument is required but the value is not actually used.\n"
17940"///\n"
17941"/// \\headerfile <x86intrin.h>\n"
17942"///\n"
17943"/// This intrinsic has no corresponding instruction.\n"
17944"///\n"
17945"/// \\returns A 128-bit vector of [4 x i32] with unspecified content.\n"
17946"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17947"_mm_undefined_si128(void)\n"
17948"{\n"
17949" return (__m128i)__builtin_ia32_undef128();\n"
17950"}\n"
17951"\n"
17952"/// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with\n"
17953"/// the specified 64-bit integer values.\n"
17954"///\n"
17955"/// \\headerfile <x86intrin.h>\n"
17956"///\n"
17957"/// This intrinsic is a utility function and does not correspond to a specific\n"
17958"/// instruction.\n"
17959"///\n"
17960"/// \\param __q1\n"
17961"/// A 64-bit integer value used to initialize the upper 64 bits of the\n"
17962"/// destination vector of [2 x i64].\n"
17963"/// \\param __q0\n"
17964"/// A 64-bit integer value used to initialize the lower 64 bits of the\n"
17965"/// destination vector of [2 x i64].\n"
17966"/// \\returns An initialized 128-bit vector of [2 x i64] containing the values\n"
17967"/// provided in the operands.\n"
17968"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17969"_mm_set_epi64x(long long __q1, long long __q0)\n"
17970"{\n"
17971" return __extension__ (__m128i)(__v2di){ __q0, __q1 };\n"
17972"}\n"
17973"\n"
17974"/// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with\n"
17975"/// the specified 64-bit integer values.\n"
17976"///\n"
17977"/// \\headerfile <x86intrin.h>\n"
17978"///\n"
17979"/// This intrinsic is a utility function and does not correspond to a specific\n"
17980"/// instruction.\n"
17981"///\n"
17982"/// \\param __q1\n"
17983"/// A 64-bit integer value used to initialize the upper 64 bits of the\n"
17984"/// destination vector of [2 x i64].\n"
17985"/// \\param __q0\n"
17986"/// A 64-bit integer value used to initialize the lower 64 bits of the\n"
17987"/// destination vector of [2 x i64].\n"
17988"/// \\returns An initialized 128-bit vector of [2 x i64] containing the values\n"
17989"/// provided in the operands.\n"
17990"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
17991"_mm_set_epi64(__m64 __q1, __m64 __q0)\n"
17992"{\n"
17993" return _mm_set_epi64x((long long)__q1, (long long)__q0);\n"
17994"}\n"
17995"\n"
17996"/// Initializes the 32-bit values in a 128-bit vector of [4 x i32] with\n"
17997"/// the specified 32-bit integer values.\n"
17998"///\n"
17999"/// \\headerfile <x86intrin.h>\n"
18000"///\n"
18001"/// This intrinsic is a utility function and does not correspond to a specific\n"
18002"/// instruction.\n"
18003"///\n"
18004"/// \\param __i3\n"
18005"/// A 32-bit integer value used to initialize bits [127:96] of the\n"
18006"/// destination vector.\n"
18007"/// \\param __i2\n"
18008"/// A 32-bit integer value used to initialize bits [95:64] of the destination\n"
18009"/// vector.\n"
18010"/// \\param __i1\n"
18011"/// A 32-bit integer value used to initialize bits [63:32] of the destination\n"
18012"/// vector.\n"
18013"/// \\param __i0\n"
18014"/// A 32-bit integer value used to initialize bits [31:0] of the destination\n"
18015"/// vector.\n"
18016"/// \\returns An initialized 128-bit vector of [4 x i32] containing the values\n"
18017"/// provided in the operands.\n"
18018"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18019"_mm_set_epi32(int __i3, int __i2, int __i1, int __i0)\n"
18020"{\n"
18021" return __extension__ (__m128i)(__v4si){ __i0, __i1, __i2, __i3};\n"
18022"}\n"
18023"\n"
18024"/// Initializes the 16-bit values in a 128-bit vector of [8 x i16] with\n"
18025"/// the specified 16-bit integer values.\n"
18026"///\n"
18027"/// \\headerfile <x86intrin.h>\n"
18028"///\n"
18029"/// This intrinsic is a utility function and does not correspond to a specific\n"
18030"/// instruction.\n"
18031"///\n"
18032"/// \\param __w7\n"
18033"/// A 16-bit integer value used to initialize bits [127:112] of the\n"
18034"/// destination vector.\n"
18035"/// \\param __w6\n"
18036"/// A 16-bit integer value used to initialize bits [111:96] of the\n"
18037"/// destination vector.\n"
18038"/// \\param __w5\n"
18039"/// A 16-bit integer value used to initialize bits [95:80] of the destination\n"
18040"/// vector.\n"
18041"/// \\param __w4\n"
18042"/// A 16-bit integer value used to initialize bits [79:64] of the destination\n"
18043"/// vector.\n"
18044"/// \\param __w3\n"
18045"/// A 16-bit integer value used to initialize bits [63:48] of the destination\n"
18046"/// vector.\n"
18047"/// \\param __w2\n"
18048"/// A 16-bit integer value used to initialize bits [47:32] of the destination\n"
18049"/// vector.\n"
18050"/// \\param __w1\n"
18051"/// A 16-bit integer value used to initialize bits [31:16] of the destination\n"
18052"/// vector.\n"
18053"/// \\param __w0\n"
18054"/// A 16-bit integer value used to initialize bits [15:0] of the destination\n"
18055"/// vector.\n"
18056"/// \\returns An initialized 128-bit vector of [8 x i16] containing the values\n"
18057"/// provided in the operands.\n"
18058"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18059"_mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, short __w2, short __w1, short __w0)\n"
18060"{\n"
18061" return __extension__ (__m128i)(__v8hi){ __w0, __w1, __w2, __w3, __w4, __w5, __w6, __w7 };\n"
18062"}\n"
18063"\n"
18064"/// Initializes the 8-bit values in a 128-bit vector of [16 x i8] with\n"
18065"/// the specified 8-bit integer values.\n"
18066"///\n"
18067"/// \\headerfile <x86intrin.h>\n"
18068"///\n"
18069"/// This intrinsic is a utility function and does not correspond to a specific\n"
18070"/// instruction.\n"
18071"///\n"
18072"/// \\param __b15\n"
18073"/// Initializes bits [127:120] of the destination vector.\n"
18074"/// \\param __b14\n"
18075"/// Initializes bits [119:112] of the destination vector.\n"
18076"/// \\param __b13\n"
18077"/// Initializes bits [111:104] of the destination vector.\n"
18078"/// \\param __b12\n"
18079"/// Initializes bits [103:96] of the destination vector.\n"
18080"/// \\param __b11\n"
18081"/// Initializes bits [95:88] of the destination vector.\n"
18082"/// \\param __b10\n"
18083"/// Initializes bits [87:80] of the destination vector.\n"
18084"/// \\param __b9\n"
18085"/// Initializes bits [79:72] of the destination vector.\n"
18086"/// \\param __b8\n"
18087"/// Initializes bits [71:64] of the destination vector.\n"
18088"/// \\param __b7\n"
18089"/// Initializes bits [63:56] of the destination vector.\n"
18090"/// \\param __b6\n"
18091"/// Initializes bits [55:48] of the destination vector.\n"
18092"/// \\param __b5\n"
18093"/// Initializes bits [47:40] of the destination vector.\n"
18094"/// \\param __b4\n"
18095"/// Initializes bits [39:32] of the destination vector.\n"
18096"/// \\param __b3\n"
18097"/// Initializes bits [31:24] of the destination vector.\n"
18098"/// \\param __b2\n"
18099"/// Initializes bits [23:16] of the destination vector.\n"
18100"/// \\param __b1\n"
18101"/// Initializes bits [15:8] of the destination vector.\n"
18102"/// \\param __b0\n"
18103"/// Initializes bits [7:0] of the destination vector.\n"
18104"/// \\returns An initialized 128-bit vector of [16 x i8] containing the values\n"
18105"/// provided in the operands.\n"
18106"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18107"_mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b9, char __b8, char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0)\n"
18108"{\n"
18109" return __extension__ (__m128i)(__v16qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15 };\n"
18110"}\n"
18111"\n"
18112"/// Initializes both values in a 128-bit integer vector with the\n"
18113"/// specified 64-bit integer value.\n"
18114"///\n"
18115"/// \\headerfile <x86intrin.h>\n"
18116"///\n"
18117"/// This intrinsic is a utility function and does not correspond to a specific\n"
18118"/// instruction.\n"
18119"///\n"
18120"/// \\param __q\n"
18121"/// Integer value used to initialize the elements of the destination integer\n"
18122"/// vector.\n"
18123"/// \\returns An initialized 128-bit integer vector of [2 x i64] with both\n"
18124"/// elements containing the value provided in the operand.\n"
18125"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18126"_mm_set1_epi64x(long long __q)\n"
18127"{\n"
18128" return _mm_set_epi64x(__q, __q);\n"
18129"}\n"
18130"\n"
18131"/// Initializes both values in a 128-bit vector of [2 x i64] with the\n"
18132"/// specified 64-bit value.\n"
18133"///\n"
18134"/// \\headerfile <x86intrin.h>\n"
18135"///\n"
18136"/// This intrinsic is a utility function and does not correspond to a specific\n"
18137"/// instruction.\n"
18138"///\n"
18139"/// \\param __q\n"
18140"/// A 64-bit value used to initialize the elements of the destination integer\n"
18141"/// vector.\n"
18142"/// \\returns An initialized 128-bit vector of [2 x i64] with all elements\n"
18143"/// containing the value provided in the operand.\n"
18144"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18145"_mm_set1_epi64(__m64 __q)\n"
18146"{\n"
18147" return _mm_set_epi64(__q, __q);\n"
18148"}\n"
18149"\n"
18150"/// Initializes all values in a 128-bit vector of [4 x i32] with the\n"
18151"/// specified 32-bit value.\n"
18152"///\n"
18153"/// \\headerfile <x86intrin.h>\n"
18154"///\n"
18155"/// This intrinsic is a utility function and does not correspond to a specific\n"
18156"/// instruction.\n"
18157"///\n"
18158"/// \\param __i\n"
18159"/// A 32-bit value used to initialize the elements of the destination integer\n"
18160"/// vector.\n"
18161"/// \\returns An initialized 128-bit vector of [4 x i32] with all elements\n"
18162"/// containing the value provided in the operand.\n"
18163"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18164"_mm_set1_epi32(int __i)\n"
18165"{\n"
18166" return _mm_set_epi32(__i, __i, __i, __i);\n"
18167"}\n"
18168"\n"
18169"/// Initializes all values in a 128-bit vector of [8 x i16] with the\n"
18170"/// specified 16-bit value.\n"
18171"///\n"
18172"/// \\headerfile <x86intrin.h>\n"
18173"///\n"
18174"/// This intrinsic is a utility function and does not correspond to a specific\n"
18175"/// instruction.\n"
18176"///\n"
18177"/// \\param __w\n"
18178"/// A 16-bit value used to initialize the elements of the destination integer\n"
18179"/// vector.\n"
18180"/// \\returns An initialized 128-bit vector of [8 x i16] with all elements\n"
18181"/// containing the value provided in the operand.\n"
18182"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18183"_mm_set1_epi16(short __w)\n"
18184"{\n"
18185" return _mm_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w);\n"
18186"}\n"
18187"\n"
18188"/// Initializes all values in a 128-bit vector of [16 x i8] with the\n"
18189"/// specified 8-bit value.\n"
18190"///\n"
18191"/// \\headerfile <x86intrin.h>\n"
18192"///\n"
18193"/// This intrinsic is a utility function and does not correspond to a specific\n"
18194"/// instruction.\n"
18195"///\n"
18196"/// \\param __b\n"
18197"/// An 8-bit value used to initialize the elements of the destination integer\n"
18198"/// vector.\n"
18199"/// \\returns An initialized 128-bit vector of [16 x i8] with all elements\n"
18200"/// containing the value provided in the operand.\n"
18201"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18202"_mm_set1_epi8(char __b)\n"
18203"{\n"
18204" return _mm_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b);\n"
18205"}\n"
18206"\n"
18207"/// Constructs a 128-bit integer vector, initialized in reverse order\n"
18208"/// with the specified 64-bit integral values.\n"
18209"///\n"
18210"/// \\headerfile <x86intrin.h>\n"
18211"///\n"
18212"/// This intrinsic does not correspond to a specific instruction.\n"
18213"///\n"
18214"/// \\param __q0\n"
18215"/// A 64-bit integral value used to initialize the lower 64 bits of the\n"
18216"/// result.\n"
18217"/// \\param __q1\n"
18218"/// A 64-bit integral value used to initialize the upper 64 bits of the\n"
18219"/// result.\n"
18220"/// \\returns An initialized 128-bit integer vector.\n"
18221"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18222"_mm_setr_epi64(__m64 __q0, __m64 __q1)\n"
18223"{\n"
18224" return _mm_set_epi64(__q1, __q0);\n"
18225"}\n"
18226"\n"
18227"/// Constructs a 128-bit integer vector, initialized in reverse order\n"
18228"/// with the specified 32-bit integral values.\n"
18229"///\n"
18230"/// \\headerfile <x86intrin.h>\n"
18231"///\n"
18232"/// This intrinsic is a utility function and does not correspond to a specific\n"
18233"/// instruction.\n"
18234"///\n"
18235"/// \\param __i0\n"
18236"/// A 32-bit integral value used to initialize bits [31:0] of the result.\n"
18237"/// \\param __i1\n"
18238"/// A 32-bit integral value used to initialize bits [63:32] of the result.\n"
18239"/// \\param __i2\n"
18240"/// A 32-bit integral value used to initialize bits [95:64] of the result.\n"
18241"/// \\param __i3\n"
18242"/// A 32-bit integral value used to initialize bits [127:96] of the result.\n"
18243"/// \\returns An initialized 128-bit integer vector.\n"
18244"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18245"_mm_setr_epi32(int __i0, int __i1, int __i2, int __i3)\n"
18246"{\n"
18247" return _mm_set_epi32(__i3, __i2, __i1, __i0);\n"
18248"}\n"
18249"\n"
18250"/// Constructs a 128-bit integer vector, initialized in reverse order\n"
18251"/// with the specified 16-bit integral values.\n"
18252"///\n"
18253"/// \\headerfile <x86intrin.h>\n"
18254"///\n"
18255"/// This intrinsic is a utility function and does not correspond to a specific\n"
18256"/// instruction.\n"
18257"///\n"
18258"/// \\param __w0\n"
18259"/// A 16-bit integral value used to initialize bits [15:0] of the result.\n"
18260"/// \\param __w1\n"
18261"/// A 16-bit integral value used to initialize bits [31:16] of the result.\n"
18262"/// \\param __w2\n"
18263"/// A 16-bit integral value used to initialize bits [47:32] of the result.\n"
18264"/// \\param __w3\n"
18265"/// A 16-bit integral value used to initialize bits [63:48] of the result.\n"
18266"/// \\param __w4\n"
18267"/// A 16-bit integral value used to initialize bits [79:64] of the result.\n"
18268"/// \\param __w5\n"
18269"/// A 16-bit integral value used to initialize bits [95:80] of the result.\n"
18270"/// \\param __w6\n"
18271"/// A 16-bit integral value used to initialize bits [111:96] of the result.\n"
18272"/// \\param __w7\n"
18273"/// A 16-bit integral value used to initialize bits [127:112] of the result.\n"
18274"/// \\returns An initialized 128-bit integer vector.\n"
18275"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18276"_mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4, short __w5, short __w6, short __w7)\n"
18277"{\n"
18278" return _mm_set_epi16(__w7, __w6, __w5, __w4, __w3, __w2, __w1, __w0);\n"
18279"}\n"
18280"\n"
18281"/// Constructs a 128-bit integer vector, initialized in reverse order\n"
18282"/// with the specified 8-bit integral values.\n"
18283"///\n"
18284"/// \\headerfile <x86intrin.h>\n"
18285"///\n"
18286"/// This intrinsic is a utility function and does not correspond to a specific\n"
18287"/// instruction.\n"
18288"///\n"
18289"/// \\param __b0\n"
18290"/// An 8-bit integral value used to initialize bits [7:0] of the result.\n"
18291"/// \\param __b1\n"
18292"/// An 8-bit integral value used to initialize bits [15:8] of the result.\n"
18293"/// \\param __b2\n"
18294"/// An 8-bit integral value used to initialize bits [23:16] of the result.\n"
18295"/// \\param __b3\n"
18296"/// An 8-bit integral value used to initialize bits [31:24] of the result.\n"
18297"/// \\param __b4\n"
18298"/// An 8-bit integral value used to initialize bits [39:32] of the result.\n"
18299"/// \\param __b5\n"
18300"/// An 8-bit integral value used to initialize bits [47:40] of the result.\n"
18301"/// \\param __b6\n"
18302"/// An 8-bit integral value used to initialize bits [55:48] of the result.\n"
18303"/// \\param __b7\n"
18304"/// An 8-bit integral value used to initialize bits [63:56] of the result.\n"
18305"/// \\param __b8\n"
18306"/// An 8-bit integral value used to initialize bits [71:64] of the result.\n"
18307"/// \\param __b9\n"
18308"/// An 8-bit integral value used to initialize bits [79:72] of the result.\n"
18309"/// \\param __b10\n"
18310"/// An 8-bit integral value used to initialize bits [87:80] of the result.\n"
18311"/// \\param __b11\n"
18312"/// An 8-bit integral value used to initialize bits [95:88] of the result.\n"
18313"/// \\param __b12\n"
18314"/// An 8-bit integral value used to initialize bits [103:96] of the result.\n"
18315"/// \\param __b13\n"
18316"/// An 8-bit integral value used to initialize bits [111:104] of the result.\n"
18317"/// \\param __b14\n"
18318"/// An 8-bit integral value used to initialize bits [119:112] of the result.\n"
18319"/// \\param __b15\n"
18320"/// An 8-bit integral value used to initialize bits [127:120] of the result.\n"
18321"/// \\returns An initialized 128-bit integer vector.\n"
18322"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18323"_mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7, char __b8, char __b9, char __b10, char __b11, char __b12, char __b13, char __b14, char __b15)\n"
18324"{\n"
18325" return _mm_set_epi8(__b15, __b14, __b13, __b12, __b11, __b10, __b9, __b8, __b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);\n"
18326"}\n"
18327"\n"
18328"/// Creates a 128-bit integer vector initialized to zero.\n"
18329"///\n"
18330"/// \\headerfile <x86intrin.h>\n"
18331"///\n"
18332"/// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction.\n"
18333"///\n"
18334"/// \\returns An initialized 128-bit integer vector with all elements set to\n"
18335"/// zero.\n"
18336"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18337"_mm_setzero_si128(void)\n"
18338"{\n"
18339" return __extension__ (__m128i)(__v2di){ 0LL, 0LL };\n"
18340"}\n"
18341"\n"
18342"/// Stores a 128-bit integer vector to a memory location aligned on a\n"
18343"/// 128-bit boundary.\n"
18344"///\n"
18345"/// \\headerfile <x86intrin.h>\n"
18346"///\n"
18347"/// This intrinsic corresponds to the <c> VMOVAPS / MOVAPS </c> instruction.\n"
18348"///\n"
18349"/// \\param __p\n"
18350"/// A pointer to an aligned memory location that will receive the integer\n"
18351"/// values.\n"
18352"/// \\param __b\n"
18353"/// A 128-bit integer vector containing the values to be moved.\n"
18354"static __inline__ void __DEFAULT_FN_ATTRS\n"
18355"_mm_store_si128(__m128i *__p, __m128i __b)\n"
18356"{\n"
18357" *__p = __b;\n"
18358"}\n"
18359"\n"
18360"/// Stores a 128-bit integer vector to an unaligned memory location.\n"
18361"///\n"
18362"/// \\headerfile <x86intrin.h>\n"
18363"///\n"
18364"/// This intrinsic corresponds to the <c> VMOVUPS / MOVUPS </c> instruction.\n"
18365"///\n"
18366"/// \\param __p\n"
18367"/// A pointer to a memory location that will receive the integer values.\n"
18368"/// \\param __b\n"
18369"/// A 128-bit integer vector containing the values to be moved.\n"
18370"static __inline__ void __DEFAULT_FN_ATTRS\n"
18371"_mm_storeu_si128(__m128i *__p, __m128i __b)\n"
18372"{\n"
18373" struct __storeu_si128 {\n"
18374" __m128i __v;\n"
18375" } __attribute__((__packed__, __may_alias__));\n"
18376" ((struct __storeu_si128*)__p)->__v = __b;\n"
18377"}\n"
18378"\n"
18379"/// Moves bytes selected by the mask from the first operand to the\n"
18380"/// specified unaligned memory location. When a mask bit is 1, the\n"
18381"/// corresponding byte is written, otherwise it is not written.\n"
18382"///\n"
18383"/// To minimize caching, the data is flagged as non-temporal (unlikely to be\n"
18384"/// used again soon). Exception and trap behavior for elements not selected\n"
18385"/// for storage to memory are implementation dependent.\n"
18386"///\n"
18387"/// \\headerfile <x86intrin.h>\n"
18388"///\n"
18389"/// This intrinsic corresponds to the <c> VMASKMOVDQU / MASKMOVDQU </c>\n"
18390"/// instruction.\n"
18391"///\n"
18392"/// \\param __d\n"
18393"/// A 128-bit integer vector containing the values to be moved.\n"
18394"/// \\param __n\n"
18395"/// A 128-bit integer vector containing the mask. The most significant bit of\n"
18396"/// each byte represents the mask bits.\n"
18397"/// \\param __p\n"
18398"/// A pointer to an unaligned 128-bit memory location where the specified\n"
18399"/// values are moved.\n"
18400"static __inline__ void __DEFAULT_FN_ATTRS\n"
18401"_mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p)\n"
18402"{\n"
18403" __builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p);\n"
18404"}\n"
18405"\n"
18406"/// Stores the lower 64 bits of a 128-bit integer vector of [2 x i64] to\n"
18407"/// a memory location.\n"
18408"///\n"
18409"/// \\headerfile <x86intrin.h>\n"
18410"///\n"
18411"/// This intrinsic corresponds to the <c> VMOVLPS / MOVLPS </c> instruction.\n"
18412"///\n"
18413"/// \\param __p\n"
18414"/// A pointer to a 64-bit memory location that will receive the lower 64 bits\n"
18415"/// of the integer vector parameter.\n"
18416"/// \\param __a\n"
18417"/// A 128-bit integer vector of [2 x i64]. The lower 64 bits contain the\n"
18418"/// value to be stored.\n"
18419"static __inline__ void __DEFAULT_FN_ATTRS\n"
18420"_mm_storel_epi64(__m128i *__p, __m128i __a)\n"
18421"{\n"
18422" struct __mm_storel_epi64_struct {\n"
18423" long long __u;\n"
18424" } __attribute__((__packed__, __may_alias__));\n"
18425" ((struct __mm_storel_epi64_struct*)__p)->__u = __a[0];\n"
18426"}\n"
18427"\n"
18428"/// Stores a 128-bit floating point vector of [2 x double] to a 128-bit\n"
18429"/// aligned memory location.\n"
18430"///\n"
18431"/// To minimize caching, the data is flagged as non-temporal (unlikely to be\n"
18432"/// used again soon).\n"
18433"///\n"
18434"/// \\headerfile <x86intrin.h>\n"
18435"///\n"
18436"/// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction.\n"
18437"///\n"
18438"/// \\param __p\n"
18439"/// A pointer to the 128-bit aligned memory location used to store the value.\n"
18440"/// \\param __a\n"
18441"/// A vector of [2 x double] containing the 64-bit values to be stored.\n"
18442"static __inline__ void __DEFAULT_FN_ATTRS\n"
18443"_mm_stream_pd(double *__p, __m128d __a)\n"
18444"{\n"
18445" __builtin_nontemporal_store((__v2df)__a, (__v2df*)__p);\n"
18446"}\n"
18447"\n"
18448"/// Stores a 128-bit integer vector to a 128-bit aligned memory location.\n"
18449"///\n"
18450"/// To minimize caching, the data is flagged as non-temporal (unlikely to be\n"
18451"/// used again soon).\n"
18452"///\n"
18453"/// \\headerfile <x86intrin.h>\n"
18454"///\n"
18455"/// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction.\n"
18456"///\n"
18457"/// \\param __p\n"
18458"/// A pointer to the 128-bit aligned memory location used to store the value.\n"
18459"/// \\param __a\n"
18460"/// A 128-bit integer vector containing the values to be stored.\n"
18461"static __inline__ void __DEFAULT_FN_ATTRS\n"
18462"_mm_stream_si128(__m128i *__p, __m128i __a)\n"
18463"{\n"
18464" __builtin_nontemporal_store((__v2di)__a, (__v2di*)__p);\n"
18465"}\n"
18466"\n"
18467"/// Stores a 32-bit integer value in the specified memory location.\n"
18468"///\n"
18469"/// To minimize caching, the data is flagged as non-temporal (unlikely to be\n"
18470"/// used again soon).\n"
18471"///\n"
18472"/// \\headerfile <x86intrin.h>\n"
18473"///\n"
18474"/// This intrinsic corresponds to the <c> MOVNTI </c> instruction.\n"
18475"///\n"
18476"/// \\param __p\n"
18477"/// A pointer to the 32-bit memory location used to store the value.\n"
18478"/// \\param __a\n"
18479"/// A 32-bit integer containing the value to be stored.\n"
18480"static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"sse2\")))\n"
18481"_mm_stream_si32(int *__p, int __a)\n"
18482"{\n"
18483" __builtin_ia32_movnti(__p, __a);\n"
18484"}\n"
18485"\n"
18486"#ifdef __x86_64__\n"
18487"/// Stores a 64-bit integer value in the specified memory location.\n"
18488"///\n"
18489"/// To minimize caching, the data is flagged as non-temporal (unlikely to be\n"
18490"/// used again soon).\n"
18491"///\n"
18492"/// \\headerfile <x86intrin.h>\n"
18493"///\n"
18494"/// This intrinsic corresponds to the <c> MOVNTIQ </c> instruction.\n"
18495"///\n"
18496"/// \\param __p\n"
18497"/// A pointer to the 64-bit memory location used to store the value.\n"
18498"/// \\param __a\n"
18499"/// A 64-bit integer containing the value to be stored.\n"
18500"static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"sse2\")))\n"
18501"_mm_stream_si64(long long *__p, long long __a)\n"
18502"{\n"
18503" __builtin_ia32_movnti64(__p, __a);\n"
18504"}\n"
18505"#endif\n"
18506"\n"
18507"#if defined(__cplusplus)\n"
18508"extern \"C\" {\n"
18509"#endif\n"
18510"\n"
18511"/// The cache line containing \\a __p is flushed and invalidated from all\n"
18512"/// caches in the coherency domain.\n"
18513"///\n"
18514"/// \\headerfile <x86intrin.h>\n"
18515"///\n"
18516"/// This intrinsic corresponds to the <c> CLFLUSH </c> instruction.\n"
18517"///\n"
18518"/// \\param __p\n"
18519"/// A pointer to the memory location used to identify the cache line to be\n"
18520"/// flushed.\n"
18521"void _mm_clflush(void const * __p);\n"
18522"\n"
18523"/// Forces strong memory ordering (serialization) between load\n"
18524"/// instructions preceding this instruction and load instructions following\n"
18525"/// this instruction, ensuring the system completes all previous loads before\n"
18526"/// executing subsequent loads.\n"
18527"///\n"
18528"/// \\headerfile <x86intrin.h>\n"
18529"///\n"
18530"/// This intrinsic corresponds to the <c> LFENCE </c> instruction.\n"
18531"///\n"
18532"void _mm_lfence(void);\n"
18533"\n"
18534"/// Forces strong memory ordering (serialization) between load and store\n"
18535"/// instructions preceding this instruction and load and store instructions\n"
18536"/// following this instruction, ensuring that the system completes all\n"
18537"/// previous memory accesses before executing subsequent memory accesses.\n"
18538"///\n"
18539"/// \\headerfile <x86intrin.h>\n"
18540"///\n"
18541"/// This intrinsic corresponds to the <c> MFENCE </c> instruction.\n"
18542"///\n"
18543"void _mm_mfence(void);\n"
18544"\n"
18545"#if defined(__cplusplus)\n"
18546"} // extern \"C\"\n"
18547"#endif\n"
18548"\n"
18549"/// Converts 16-bit signed integers from both 128-bit integer vector\n"
18550"/// operands into 8-bit signed integers, and packs the results into the\n"
18551"/// destination. Positive values greater than 0x7F are saturated to 0x7F.\n"
18552"/// Negative values less than 0x80 are saturated to 0x80.\n"
18553"///\n"
18554"/// \\headerfile <x86intrin.h>\n"
18555"///\n"
18556"/// This intrinsic corresponds to the <c> VPACKSSWB / PACKSSWB </c> instruction.\n"
18557"///\n"
18558"/// \\param __a\n"
18559"/// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as\n"
18560"/// a signed integer and is converted to a 8-bit signed integer with\n"
18561"/// saturation. Values greater than 0x7F are saturated to 0x7F. Values less\n"
18562"/// than 0x80 are saturated to 0x80. The converted [8 x i8] values are\n"
18563"/// written to the lower 64 bits of the result.\n"
18564"/// \\param __b\n"
18565"/// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as\n"
18566"/// a signed integer and is converted to a 8-bit signed integer with\n"
18567"/// saturation. Values greater than 0x7F are saturated to 0x7F. Values less\n"
18568"/// than 0x80 are saturated to 0x80. The converted [8 x i8] values are\n"
18569"/// written to the higher 64 bits of the result.\n"
18570"/// \\returns A 128-bit vector of [16 x i8] containing the converted values.\n"
18571"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18572"_mm_packs_epi16(__m128i __a, __m128i __b)\n"
18573"{\n"
18574" return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b);\n"
18575"}\n"
18576"\n"
18577"/// Converts 32-bit signed integers from both 128-bit integer vector\n"
18578"/// operands into 16-bit signed integers, and packs the results into the\n"
18579"/// destination. Positive values greater than 0x7FFF are saturated to 0x7FFF.\n"
18580"/// Negative values less than 0x8000 are saturated to 0x8000.\n"
18581"///\n"
18582"/// \\headerfile <x86intrin.h>\n"
18583"///\n"
18584"/// This intrinsic corresponds to the <c> VPACKSSDW / PACKSSDW </c> instruction.\n"
18585"///\n"
18586"/// \\param __a\n"
18587"/// A 128-bit integer vector of [4 x i32]. Each 32-bit element is treated as\n"
18588"/// a signed integer and is converted to a 16-bit signed integer with\n"
18589"/// saturation. Values greater than 0x7FFF are saturated to 0x7FFF. Values\n"
18590"/// less than 0x8000 are saturated to 0x8000. The converted [4 x i16] values\n"
18591"/// are written to the lower 64 bits of the result.\n"
18592"/// \\param __b\n"
18593"/// A 128-bit integer vector of [4 x i32]. Each 32-bit element is treated as\n"
18594"/// a signed integer and is converted to a 16-bit signed integer with\n"
18595"/// saturation. Values greater than 0x7FFF are saturated to 0x7FFF. Values\n"
18596"/// less than 0x8000 are saturated to 0x8000. The converted [4 x i16] values\n"
18597"/// are written to the higher 64 bits of the result.\n"
18598"/// \\returns A 128-bit vector of [8 x i16] containing the converted values.\n"
18599"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18600"_mm_packs_epi32(__m128i __a, __m128i __b)\n"
18601"{\n"
18602" return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b);\n"
18603"}\n"
18604"\n"
18605"/// Converts 16-bit signed integers from both 128-bit integer vector\n"
18606"/// operands into 8-bit unsigned integers, and packs the results into the\n"
18607"/// destination. Values greater than 0xFF are saturated to 0xFF. Values less\n"
18608"/// than 0x00 are saturated to 0x00.\n"
18609"///\n"
18610"/// \\headerfile <x86intrin.h>\n"
18611"///\n"
18612"/// This intrinsic corresponds to the <c> VPACKUSWB / PACKUSWB </c> instruction.\n"
18613"///\n"
18614"/// \\param __a\n"
18615"/// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as\n"
18616"/// a signed integer and is converted to an 8-bit unsigned integer with\n"
18617"/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less\n"
18618"/// than 0x00 are saturated to 0x00. The converted [8 x i8] values are\n"
18619"/// written to the lower 64 bits of the result.\n"
18620"/// \\param __b\n"
18621"/// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as\n"
18622"/// a signed integer and is converted to an 8-bit unsigned integer with\n"
18623"/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less\n"
18624"/// than 0x00 are saturated to 0x00. The converted [8 x i8] values are\n"
18625"/// written to the higher 64 bits of the result.\n"
18626"/// \\returns A 128-bit vector of [16 x i8] containing the converted values.\n"
18627"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18628"_mm_packus_epi16(__m128i __a, __m128i __b)\n"
18629"{\n"
18630" return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b);\n"
18631"}\n"
18632"\n"
18633"/// Extracts 16 bits from a 128-bit integer vector of [8 x i16], using\n"
18634"/// the immediate-value parameter as a selector.\n"
18635"///\n"
18636"/// \\headerfile <x86intrin.h>\n"
18637"///\n"
18638"/// This intrinsic corresponds to the <c> VPEXTRW / PEXTRW </c> instruction.\n"
18639"///\n"
18640"/// \\param __a\n"
18641"/// A 128-bit integer vector.\n"
18642"/// \\param __imm\n"
18643"/// An immediate value. Bits [2:0] selects values from \\a __a to be assigned\n"
18644"/// to bits[15:0] of the result. \\n\n"
18645"/// 000: assign values from bits [15:0] of \\a __a. \\n\n"
18646"/// 001: assign values from bits [31:16] of \\a __a. \\n\n"
18647"/// 010: assign values from bits [47:32] of \\a __a. \\n\n"
18648"/// 011: assign values from bits [63:48] of \\a __a. \\n\n"
18649"/// 100: assign values from bits [79:64] of \\a __a. \\n\n"
18650"/// 101: assign values from bits [95:80] of \\a __a. \\n\n"
18651"/// 110: assign values from bits [111:96] of \\a __a. \\n\n"
18652"/// 111: assign values from bits [127:112] of \\a __a.\n"
18653"/// \\returns An integer, whose lower 16 bits are selected from the 128-bit\n"
18654"/// integer vector parameter and the remaining bits are assigned zeros.\n"
18655"#define _mm_extract_epi16(a, imm) \\\n"
18656" (int)(unsigned short)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(a), \\\n"
18657" (int)(imm))\n"
18658"\n"
18659"/// Constructs a 128-bit integer vector by first making a copy of the\n"
18660"/// 128-bit integer vector parameter, and then inserting the lower 16 bits\n"
18661"/// of an integer parameter into an offset specified by the immediate-value\n"
18662"/// parameter.\n"
18663"///\n"
18664"/// \\headerfile <x86intrin.h>\n"
18665"///\n"
18666"/// This intrinsic corresponds to the <c> VPINSRW / PINSRW </c> instruction.\n"
18667"///\n"
18668"/// \\param __a\n"
18669"/// A 128-bit integer vector of [8 x i16]. This vector is copied to the\n"
18670"/// result and then one of the eight elements in the result is replaced by\n"
18671"/// the lower 16 bits of \\a __b.\n"
18672"/// \\param __b\n"
18673"/// An integer. The lower 16 bits of this parameter are written to the\n"
18674"/// result beginning at an offset specified by \\a __imm.\n"
18675"/// \\param __imm\n"
18676"/// An immediate value specifying the bit offset in the result at which the\n"
18677"/// lower 16 bits of \\a __b are written.\n"
18678"/// \\returns A 128-bit integer vector containing the constructed values.\n"
18679"#define _mm_insert_epi16(a, b, imm) \\\n"
18680" (__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (int)(b), \\\n"
18681" (int)(imm))\n"
18682"\n"
18683"/// Copies the values of the most significant bits from each 8-bit\n"
18684"/// element in a 128-bit integer vector of [16 x i8] to create a 16-bit mask\n"
18685"/// value, zero-extends the value, and writes it to the destination.\n"
18686"///\n"
18687"/// \\headerfile <x86intrin.h>\n"
18688"///\n"
18689"/// This intrinsic corresponds to the <c> VPMOVMSKB / PMOVMSKB </c> instruction.\n"
18690"///\n"
18691"/// \\param __a\n"
18692"/// A 128-bit integer vector containing the values with bits to be extracted.\n"
18693"/// \\returns The most significant bits from each 8-bit element in \\a __a,\n"
18694"/// written to bits [15:0]. The other bits are assigned zeros.\n"
18695"static __inline__ int __DEFAULT_FN_ATTRS\n"
18696"_mm_movemask_epi8(__m128i __a)\n"
18697"{\n"
18698" return __builtin_ia32_pmovmskb128((__v16qi)__a);\n"
18699"}\n"
18700"\n"
18701"/// Constructs a 128-bit integer vector by shuffling four 32-bit\n"
18702"/// elements of a 128-bit integer vector parameter, using the immediate-value\n"
18703"/// parameter as a specifier.\n"
18704"///\n"
18705"/// \\headerfile <x86intrin.h>\n"
18706"///\n"
18707"/// \\code\n"
18708"/// __m128i _mm_shuffle_epi32(__m128i a, const int imm);\n"
18709"/// \\endcode\n"
18710"///\n"
18711"/// This intrinsic corresponds to the <c> VPSHUFD / PSHUFD </c> instruction.\n"
18712"///\n"
18713"/// \\param a\n"
18714"/// A 128-bit integer vector containing the values to be copied.\n"
18715"/// \\param imm\n"
18716"/// An immediate value containing an 8-bit value specifying which elements to\n"
18717"/// copy from a. The destinations within the 128-bit destination are assigned\n"
18718"/// values as follows: \\n\n"
18719"/// Bits [1:0] are used to assign values to bits [31:0] of the result. \\n\n"
18720"/// Bits [3:2] are used to assign values to bits [63:32] of the result. \\n\n"
18721"/// Bits [5:4] are used to assign values to bits [95:64] of the result. \\n\n"
18722"/// Bits [7:6] are used to assign values to bits [127:96] of the result. \\n\n"
18723"/// Bit value assignments: \\n\n"
18724"/// 00: assign values from bits [31:0] of \\a a. \\n\n"
18725"/// 01: assign values from bits [63:32] of \\a a. \\n\n"
18726"/// 10: assign values from bits [95:64] of \\a a. \\n\n"
18727"/// 11: assign values from bits [127:96] of \\a a.\n"
18728"/// \\returns A 128-bit integer vector containing the shuffled values.\n"
18729"#define _mm_shuffle_epi32(a, imm) \\\n"
18730" (__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm))\n"
18731"\n"
18732"/// Constructs a 128-bit integer vector by shuffling four lower 16-bit\n"
18733"/// elements of a 128-bit integer vector of [8 x i16], using the immediate\n"
18734"/// value parameter as a specifier.\n"
18735"///\n"
18736"/// \\headerfile <x86intrin.h>\n"
18737"///\n"
18738"/// \\code\n"
18739"/// __m128i _mm_shufflelo_epi16(__m128i a, const int imm);\n"
18740"/// \\endcode\n"
18741"///\n"
18742"/// This intrinsic corresponds to the <c> VPSHUFLW / PSHUFLW </c> instruction.\n"
18743"///\n"
18744"/// \\param a\n"
18745"/// A 128-bit integer vector of [8 x i16]. Bits [127:64] are copied to bits\n"
18746"/// [127:64] of the result.\n"
18747"/// \\param imm\n"
18748"/// An 8-bit immediate value specifying which elements to copy from \\a a. \\n\n"
18749"/// Bits[1:0] are used to assign values to bits [15:0] of the result. \\n\n"
18750"/// Bits[3:2] are used to assign values to bits [31:16] of the result. \\n\n"
18751"/// Bits[5:4] are used to assign values to bits [47:32] of the result. \\n\n"
18752"/// Bits[7:6] are used to assign values to bits [63:48] of the result. \\n\n"
18753"/// Bit value assignments: \\n\n"
18754"/// 00: assign values from bits [15:0] of \\a a. \\n\n"
18755"/// 01: assign values from bits [31:16] of \\a a. \\n\n"
18756"/// 10: assign values from bits [47:32] of \\a a. \\n\n"
18757"/// 11: assign values from bits [63:48] of \\a a. \\n\n"
18758"/// \\returns A 128-bit integer vector containing the shuffled values.\n"
18759"#define _mm_shufflelo_epi16(a, imm) \\\n"
18760" (__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(imm))\n"
18761"\n"
18762"/// Constructs a 128-bit integer vector by shuffling four upper 16-bit\n"
18763"/// elements of a 128-bit integer vector of [8 x i16], using the immediate\n"
18764"/// value parameter as a specifier.\n"
18765"///\n"
18766"/// \\headerfile <x86intrin.h>\n"
18767"///\n"
18768"/// \\code\n"
18769"/// __m128i _mm_shufflehi_epi16(__m128i a, const int imm);\n"
18770"/// \\endcode\n"
18771"///\n"
18772"/// This intrinsic corresponds to the <c> VPSHUFHW / PSHUFHW </c> instruction.\n"
18773"///\n"
18774"/// \\param a\n"
18775"/// A 128-bit integer vector of [8 x i16]. Bits [63:0] are copied to bits\n"
18776"/// [63:0] of the result.\n"
18777"/// \\param imm\n"
18778"/// An 8-bit immediate value specifying which elements to copy from \\a a. \\n\n"
18779"/// Bits[1:0] are used to assign values to bits [79:64] of the result. \\n\n"
18780"/// Bits[3:2] are used to assign values to bits [95:80] of the result. \\n\n"
18781"/// Bits[5:4] are used to assign values to bits [111:96] of the result. \\n\n"
18782"/// Bits[7:6] are used to assign values to bits [127:112] of the result. \\n\n"
18783"/// Bit value assignments: \\n\n"
18784"/// 00: assign values from bits [79:64] of \\a a. \\n\n"
18785"/// 01: assign values from bits [95:80] of \\a a. \\n\n"
18786"/// 10: assign values from bits [111:96] of \\a a. \\n\n"
18787"/// 11: assign values from bits [127:112] of \\a a. \\n\n"
18788"/// \\returns A 128-bit integer vector containing the shuffled values.\n"
18789"#define _mm_shufflehi_epi16(a, imm) \\\n"
18790" (__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(imm))\n"
18791"\n"
18792"/// Unpacks the high-order (index 8-15) values from two 128-bit vectors\n"
18793"/// of [16 x i8] and interleaves them into a 128-bit vector of [16 x i8].\n"
18794"///\n"
18795"/// \\headerfile <x86intrin.h>\n"
18796"///\n"
18797"/// This intrinsic corresponds to the <c> VPUNPCKHBW / PUNPCKHBW </c>\n"
18798"/// instruction.\n"
18799"///\n"
18800"/// \\param __a\n"
18801"/// A 128-bit vector of [16 x i8].\n"
18802"/// Bits [71:64] are written to bits [7:0] of the result. \\n\n"
18803"/// Bits [79:72] are written to bits [23:16] of the result. \\n\n"
18804"/// Bits [87:80] are written to bits [39:32] of the result. \\n\n"
18805"/// Bits [95:88] are written to bits [55:48] of the result. \\n\n"
18806"/// Bits [103:96] are written to bits [71:64] of the result. \\n\n"
18807"/// Bits [111:104] are written to bits [87:80] of the result. \\n\n"
18808"/// Bits [119:112] are written to bits [103:96] of the result. \\n\n"
18809"/// Bits [127:120] are written to bits [119:112] of the result.\n"
18810"/// \\param __b\n"
18811"/// A 128-bit vector of [16 x i8]. \\n\n"
18812"/// Bits [71:64] are written to bits [15:8] of the result. \\n\n"
18813"/// Bits [79:72] are written to bits [31:24] of the result. \\n\n"
18814"/// Bits [87:80] are written to bits [47:40] of the result. \\n\n"
18815"/// Bits [95:88] are written to bits [63:56] of the result. \\n\n"
18816"/// Bits [103:96] are written to bits [79:72] of the result. \\n\n"
18817"/// Bits [111:104] are written to bits [95:88] of the result. \\n\n"
18818"/// Bits [119:112] are written to bits [111:104] of the result. \\n\n"
18819"/// Bits [127:120] are written to bits [127:120] of the result.\n"
18820"/// \\returns A 128-bit vector of [16 x i8] containing the interleaved values.\n"
18821"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18822"_mm_unpackhi_epi8(__m128i __a, __m128i __b)\n"
18823"{\n"
18824" return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);\n"
18825"}\n"
18826"\n"
18827"/// Unpacks the high-order (index 4-7) values from two 128-bit vectors of\n"
18828"/// [8 x i16] and interleaves them into a 128-bit vector of [8 x i16].\n"
18829"///\n"
18830"/// \\headerfile <x86intrin.h>\n"
18831"///\n"
18832"/// This intrinsic corresponds to the <c> VPUNPCKHWD / PUNPCKHWD </c>\n"
18833"/// instruction.\n"
18834"///\n"
18835"/// \\param __a\n"
18836"/// A 128-bit vector of [8 x i16].\n"
18837"/// Bits [79:64] are written to bits [15:0] of the result. \\n\n"
18838"/// Bits [95:80] are written to bits [47:32] of the result. \\n\n"
18839"/// Bits [111:96] are written to bits [79:64] of the result. \\n\n"
18840"/// Bits [127:112] are written to bits [111:96] of the result.\n"
18841"/// \\param __b\n"
18842"/// A 128-bit vector of [8 x i16].\n"
18843"/// Bits [79:64] are written to bits [31:16] of the result. \\n\n"
18844"/// Bits [95:80] are written to bits [63:48] of the result. \\n\n"
18845"/// Bits [111:96] are written to bits [95:80] of the result. \\n\n"
18846"/// Bits [127:112] are written to bits [127:112] of the result.\n"
18847"/// \\returns A 128-bit vector of [8 x i16] containing the interleaved values.\n"
18848"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18849"_mm_unpackhi_epi16(__m128i __a, __m128i __b)\n"
18850"{\n"
18851" return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7);\n"
18852"}\n"
18853"\n"
18854"/// Unpacks the high-order (index 2,3) values from two 128-bit vectors of\n"
18855"/// [4 x i32] and interleaves them into a 128-bit vector of [4 x i32].\n"
18856"///\n"
18857"/// \\headerfile <x86intrin.h>\n"
18858"///\n"
18859"/// This intrinsic corresponds to the <c> VPUNPCKHDQ / PUNPCKHDQ </c>\n"
18860"/// instruction.\n"
18861"///\n"
18862"/// \\param __a\n"
18863"/// A 128-bit vector of [4 x i32]. \\n\n"
18864"/// Bits [95:64] are written to bits [31:0] of the destination. \\n\n"
18865"/// Bits [127:96] are written to bits [95:64] of the destination.\n"
18866"/// \\param __b\n"
18867"/// A 128-bit vector of [4 x i32]. \\n\n"
18868"/// Bits [95:64] are written to bits [64:32] of the destination. \\n\n"
18869"/// Bits [127:96] are written to bits [127:96] of the destination.\n"
18870"/// \\returns A 128-bit vector of [4 x i32] containing the interleaved values.\n"
18871"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18872"_mm_unpackhi_epi32(__m128i __a, __m128i __b)\n"
18873"{\n"
18874" return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4+2, 3, 4+3);\n"
18875"}\n"
18876"\n"
18877"/// Unpacks the high-order 64-bit elements from two 128-bit vectors of\n"
18878"/// [2 x i64] and interleaves them into a 128-bit vector of [2 x i64].\n"
18879"///\n"
18880"/// \\headerfile <x86intrin.h>\n"
18881"///\n"
18882"/// This intrinsic corresponds to the <c> VPUNPCKHQDQ / PUNPCKHQDQ </c>\n"
18883"/// instruction.\n"
18884"///\n"
18885"/// \\param __a\n"
18886"/// A 128-bit vector of [2 x i64]. \\n\n"
18887"/// Bits [127:64] are written to bits [63:0] of the destination.\n"
18888"/// \\param __b\n"
18889"/// A 128-bit vector of [2 x i64]. \\n\n"
18890"/// Bits [127:64] are written to bits [127:64] of the destination.\n"
18891"/// \\returns A 128-bit vector of [2 x i64] containing the interleaved values.\n"
18892"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18893"_mm_unpackhi_epi64(__m128i __a, __m128i __b)\n"
18894"{\n"
18895" return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 1, 2+1);\n"
18896"}\n"
18897"\n"
18898"/// Unpacks the low-order (index 0-7) values from two 128-bit vectors of\n"
18899"/// [16 x i8] and interleaves them into a 128-bit vector of [16 x i8].\n"
18900"///\n"
18901"/// \\headerfile <x86intrin.h>\n"
18902"///\n"
18903"/// This intrinsic corresponds to the <c> VPUNPCKLBW / PUNPCKLBW </c>\n"
18904"/// instruction.\n"
18905"///\n"
18906"/// \\param __a\n"
18907"/// A 128-bit vector of [16 x i8]. \\n\n"
18908"/// Bits [7:0] are written to bits [7:0] of the result. \\n\n"
18909"/// Bits [15:8] are written to bits [23:16] of the result. \\n\n"
18910"/// Bits [23:16] are written to bits [39:32] of the result. \\n\n"
18911"/// Bits [31:24] are written to bits [55:48] of the result. \\n\n"
18912"/// Bits [39:32] are written to bits [71:64] of the result. \\n\n"
18913"/// Bits [47:40] are written to bits [87:80] of the result. \\n\n"
18914"/// Bits [55:48] are written to bits [103:96] of the result. \\n\n"
18915"/// Bits [63:56] are written to bits [119:112] of the result.\n"
18916"/// \\param __b\n"
18917"/// A 128-bit vector of [16 x i8].\n"
18918"/// Bits [7:0] are written to bits [15:8] of the result. \\n\n"
18919"/// Bits [15:8] are written to bits [31:24] of the result. \\n\n"
18920"/// Bits [23:16] are written to bits [47:40] of the result. \\n\n"
18921"/// Bits [31:24] are written to bits [63:56] of the result. \\n\n"
18922"/// Bits [39:32] are written to bits [79:72] of the result. \\n\n"
18923"/// Bits [47:40] are written to bits [95:88] of the result. \\n\n"
18924"/// Bits [55:48] are written to bits [111:104] of the result. \\n\n"
18925"/// Bits [63:56] are written to bits [127:120] of the result.\n"
18926"/// \\returns A 128-bit vector of [16 x i8] containing the interleaved values.\n"
18927"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18928"_mm_unpacklo_epi8(__m128i __a, __m128i __b)\n"
18929"{\n"
18930" return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7);\n"
18931"}\n"
18932"\n"
18933"/// Unpacks the low-order (index 0-3) values from each of the two 128-bit\n"
18934"/// vectors of [8 x i16] and interleaves them into a 128-bit vector of\n"
18935"/// [8 x i16].\n"
18936"///\n"
18937"/// \\headerfile <x86intrin.h>\n"
18938"///\n"
18939"/// This intrinsic corresponds to the <c> VPUNPCKLWD / PUNPCKLWD </c>\n"
18940"/// instruction.\n"
18941"///\n"
18942"/// \\param __a\n"
18943"/// A 128-bit vector of [8 x i16].\n"
18944"/// Bits [15:0] are written to bits [15:0] of the result. \\n\n"
18945"/// Bits [31:16] are written to bits [47:32] of the result. \\n\n"
18946"/// Bits [47:32] are written to bits [79:64] of the result. \\n\n"
18947"/// Bits [63:48] are written to bits [111:96] of the result.\n"
18948"/// \\param __b\n"
18949"/// A 128-bit vector of [8 x i16].\n"
18950"/// Bits [15:0] are written to bits [31:16] of the result. \\n\n"
18951"/// Bits [31:16] are written to bits [63:48] of the result. \\n\n"
18952"/// Bits [47:32] are written to bits [95:80] of the result. \\n\n"
18953"/// Bits [63:48] are written to bits [127:112] of the result.\n"
18954"/// \\returns A 128-bit vector of [8 x i16] containing the interleaved values.\n"
18955"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18956"_mm_unpacklo_epi16(__m128i __a, __m128i __b)\n"
18957"{\n"
18958" return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3);\n"
18959"}\n"
18960"\n"
18961"/// Unpacks the low-order (index 0,1) values from two 128-bit vectors of\n"
18962"/// [4 x i32] and interleaves them into a 128-bit vector of [4 x i32].\n"
18963"///\n"
18964"/// \\headerfile <x86intrin.h>\n"
18965"///\n"
18966"/// This intrinsic corresponds to the <c> VPUNPCKLDQ / PUNPCKLDQ </c>\n"
18967"/// instruction.\n"
18968"///\n"
18969"/// \\param __a\n"
18970"/// A 128-bit vector of [4 x i32]. \\n\n"
18971"/// Bits [31:0] are written to bits [31:0] of the destination. \\n\n"
18972"/// Bits [63:32] are written to bits [95:64] of the destination.\n"
18973"/// \\param __b\n"
18974"/// A 128-bit vector of [4 x i32]. \\n\n"
18975"/// Bits [31:0] are written to bits [64:32] of the destination. \\n\n"
18976"/// Bits [63:32] are written to bits [127:96] of the destination.\n"
18977"/// \\returns A 128-bit vector of [4 x i32] containing the interleaved values.\n"
18978"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
18979"_mm_unpacklo_epi32(__m128i __a, __m128i __b)\n"
18980"{\n"
18981" return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4+0, 1, 4+1);\n"
18982"}\n"
18983"\n"
18984"/// Unpacks the low-order 64-bit elements from two 128-bit vectors of\n"
18985"/// [2 x i64] and interleaves them into a 128-bit vector of [2 x i64].\n"
18986"///\n"
18987"/// \\headerfile <x86intrin.h>\n"
18988"///\n"
18989"/// This intrinsic corresponds to the <c> VPUNPCKLQDQ / PUNPCKLQDQ </c>\n"
18990"/// instruction.\n"
18991"///\n"
18992"/// \\param __a\n"
18993"/// A 128-bit vector of [2 x i64]. \\n\n"
18994"/// Bits [63:0] are written to bits [63:0] of the destination. \\n\n"
18995"/// \\param __b\n"
18996"/// A 128-bit vector of [2 x i64]. \\n\n"
18997"/// Bits [63:0] are written to bits [127:64] of the destination. \\n\n"
18998"/// \\returns A 128-bit vector of [2 x i64] containing the interleaved values.\n"
18999"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
19000"_mm_unpacklo_epi64(__m128i __a, __m128i __b)\n"
19001"{\n"
19002" return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 0, 2+0);\n"
19003"}\n"
19004"\n"
19005"/// Returns the lower 64 bits of a 128-bit integer vector as a 64-bit\n"
19006"/// integer.\n"
19007"///\n"
19008"/// \\headerfile <x86intrin.h>\n"
19009"///\n"
19010"/// This intrinsic corresponds to the <c> MOVDQ2Q </c> instruction.\n"
19011"///\n"
19012"/// \\param __a\n"
19013"/// A 128-bit integer vector operand. The lower 64 bits are moved to the\n"
19014"/// destination.\n"
19015"/// \\returns A 64-bit integer containing the lower 64 bits of the parameter.\n"
19016"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
19017"_mm_movepi64_pi64(__m128i __a)\n"
19018"{\n"
19019" return (__m64)__a[0];\n"
19020"}\n"
19021"\n"
19022"/// Moves the 64-bit operand to a 128-bit integer vector, zeroing the\n"
19023"/// upper bits.\n"
19024"///\n"
19025"/// \\headerfile <x86intrin.h>\n"
19026"///\n"
19027"/// This intrinsic corresponds to the <c> MOVD+VMOVQ </c> instruction.\n"
19028"///\n"
19029"/// \\param __a\n"
19030"/// A 64-bit value.\n"
19031"/// \\returns A 128-bit integer vector. The lower 64 bits contain the value from\n"
19032"/// the operand. The upper 64 bits are assigned zeros.\n"
19033"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
19034"_mm_movpi64_epi64(__m64 __a)\n"
19035"{\n"
19036" return __extension__ (__m128i)(__v2di){ (long long)__a, 0 };\n"
19037"}\n"
19038"\n"
19039"/// Moves the lower 64 bits of a 128-bit integer vector to a 128-bit\n"
19040"/// integer vector, zeroing the upper bits.\n"
19041"///\n"
19042"/// \\headerfile <x86intrin.h>\n"
19043"///\n"
19044"/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.\n"
19045"///\n"
19046"/// \\param __a\n"
19047"/// A 128-bit integer vector operand. The lower 64 bits are moved to the\n"
19048"/// destination.\n"
19049"/// \\returns A 128-bit integer vector. The lower 64 bits contain the value from\n"
19050"/// the operand. The upper 64 bits are assigned zeros.\n"
19051"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
19052"_mm_move_epi64(__m128i __a)\n"
19053"{\n"
19054" return __builtin_shufflevector((__v2di)__a, _mm_setzero_si128(), 0, 2);\n"
19055"}\n"
19056"\n"
19057"/// Unpacks the high-order 64-bit elements from two 128-bit vectors of\n"
19058"/// [2 x double] and interleaves them into a 128-bit vector of [2 x\n"
19059"/// double].\n"
19060"///\n"
19061"/// \\headerfile <x86intrin.h>\n"
19062"///\n"
19063"/// This intrinsic corresponds to the <c> VUNPCKHPD / UNPCKHPD </c> instruction.\n"
19064"///\n"
19065"/// \\param __a\n"
19066"/// A 128-bit vector of [2 x double]. \\n\n"
19067"/// Bits [127:64] are written to bits [63:0] of the destination.\n"
19068"/// \\param __b\n"
19069"/// A 128-bit vector of [2 x double]. \\n\n"
19070"/// Bits [127:64] are written to bits [127:64] of the destination.\n"
19071"/// \\returns A 128-bit vector of [2 x double] containing the interleaved values.\n"
19072"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
19073"_mm_unpackhi_pd(__m128d __a, __m128d __b)\n"
19074"{\n"
19075" return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 1, 2+1);\n"
19076"}\n"
19077"\n"
19078"/// Unpacks the low-order 64-bit elements from two 128-bit vectors\n"
19079"/// of [2 x double] and interleaves them into a 128-bit vector of [2 x\n"
19080"/// double].\n"
19081"///\n"
19082"/// \\headerfile <x86intrin.h>\n"
19083"///\n"
19084"/// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction.\n"
19085"///\n"
19086"/// \\param __a\n"
19087"/// A 128-bit vector of [2 x double]. \\n\n"
19088"/// Bits [63:0] are written to bits [63:0] of the destination.\n"
19089"/// \\param __b\n"
19090"/// A 128-bit vector of [2 x double]. \\n\n"
19091"/// Bits [63:0] are written to bits [127:64] of the destination.\n"
19092"/// \\returns A 128-bit vector of [2 x double] containing the interleaved values.\n"
19093"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
19094"_mm_unpacklo_pd(__m128d __a, __m128d __b)\n"
19095"{\n"
19096" return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 0, 2+0);\n"
19097"}\n"
19098"\n"
19099"/// Extracts the sign bits of the double-precision values in the 128-bit\n"
19100"/// vector of [2 x double], zero-extends the value, and writes it to the\n"
19101"/// low-order bits of the destination.\n"
19102"///\n"
19103"/// \\headerfile <x86intrin.h>\n"
19104"///\n"
19105"/// This intrinsic corresponds to the <c> VMOVMSKPD / MOVMSKPD </c> instruction.\n"
19106"///\n"
19107"/// \\param __a\n"
19108"/// A 128-bit vector of [2 x double] containing the values with sign bits to\n"
19109"/// be extracted.\n"
19110"/// \\returns The sign bits from each of the double-precision elements in \\a __a,\n"
19111"/// written to bits [1:0]. The remaining bits are assigned values of zero.\n"
19112"static __inline__ int __DEFAULT_FN_ATTRS\n"
19113"_mm_movemask_pd(__m128d __a)\n"
19114"{\n"
19115" return __builtin_ia32_movmskpd((__v2df)__a);\n"
19116"}\n"
19117"\n"
19118"\n"
19119"/// Constructs a 128-bit floating-point vector of [2 x double] from two\n"
19120"/// 128-bit vector parameters of [2 x double], using the immediate-value\n"
19121"/// parameter as a specifier.\n"
19122"///\n"
19123"/// \\headerfile <x86intrin.h>\n"
19124"///\n"
19125"/// \\code\n"
19126"/// __m128d _mm_shuffle_pd(__m128d a, __m128d b, const int i);\n"
19127"/// \\endcode\n"
19128"///\n"
19129"/// This intrinsic corresponds to the <c> VSHUFPD / SHUFPD </c> instruction.\n"
19130"///\n"
19131"/// \\param a\n"
19132"/// A 128-bit vector of [2 x double].\n"
19133"/// \\param b\n"
19134"/// A 128-bit vector of [2 x double].\n"
19135"/// \\param i\n"
19136"/// An 8-bit immediate value. The least significant two bits specify which\n"
19137"/// elements to copy from \\a a and \\a b: \\n\n"
19138"/// Bit[0] = 0: lower element of \\a a copied to lower element of result. \\n\n"
19139"/// Bit[0] = 1: upper element of \\a a copied to lower element of result. \\n\n"
19140"/// Bit[1] = 0: lower element of \\a b copied to upper element of result. \\n\n"
19141"/// Bit[1] = 1: upper element of \\a b copied to upper element of result. \\n\n"
19142"/// \\returns A 128-bit vector of [2 x double] containing the shuffled values.\n"
19143"#define _mm_shuffle_pd(a, b, i) \\\n"
19144" (__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \\\n"
19145" (int)(i))\n"
19146"\n"
19147"/// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit\n"
19148"/// floating-point vector of [4 x float].\n"
19149"///\n"
19150"/// \\headerfile <x86intrin.h>\n"
19151"///\n"
19152"/// This intrinsic has no corresponding instruction.\n"
19153"///\n"
19154"/// \\param __a\n"
19155"/// A 128-bit floating-point vector of [2 x double].\n"
19156"/// \\returns A 128-bit floating-point vector of [4 x float] containing the same\n"
19157"/// bitwise pattern as the parameter.\n"
19158"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
19159"_mm_castpd_ps(__m128d __a)\n"
19160"{\n"
19161" return (__m128)__a;\n"
19162"}\n"
19163"\n"
19164"/// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit\n"
19165"/// integer vector.\n"
19166"///\n"
19167"/// \\headerfile <x86intrin.h>\n"
19168"///\n"
19169"/// This intrinsic has no corresponding instruction.\n"
19170"///\n"
19171"/// \\param __a\n"
19172"/// A 128-bit floating-point vector of [2 x double].\n"
19173"/// \\returns A 128-bit integer vector containing the same bitwise pattern as the\n"
19174"/// parameter.\n"
19175"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
19176"_mm_castpd_si128(__m128d __a)\n"
19177"{\n"
19178" return (__m128i)__a;\n"
19179"}\n"
19180"\n"
19181"/// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit\n"
19182"/// floating-point vector of [2 x double].\n"
19183"///\n"
19184"/// \\headerfile <x86intrin.h>\n"
19185"///\n"
19186"/// This intrinsic has no corresponding instruction.\n"
19187"///\n"
19188"/// \\param __a\n"
19189"/// A 128-bit floating-point vector of [4 x float].\n"
19190"/// \\returns A 128-bit floating-point vector of [2 x double] containing the same\n"
19191"/// bitwise pattern as the parameter.\n"
19192"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
19193"_mm_castps_pd(__m128 __a)\n"
19194"{\n"
19195" return (__m128d)__a;\n"
19196"}\n"
19197"\n"
19198"/// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit\n"
19199"/// integer vector.\n"
19200"///\n"
19201"/// \\headerfile <x86intrin.h>\n"
19202"///\n"
19203"/// This intrinsic has no corresponding instruction.\n"
19204"///\n"
19205"/// \\param __a\n"
19206"/// A 128-bit floating-point vector of [4 x float].\n"
19207"/// \\returns A 128-bit integer vector containing the same bitwise pattern as the\n"
19208"/// parameter.\n"
19209"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
19210"_mm_castps_si128(__m128 __a)\n"
19211"{\n"
19212" return (__m128i)__a;\n"
19213"}\n"
19214"\n"
19215"/// Casts a 128-bit integer vector into a 128-bit floating-point vector\n"
19216"/// of [4 x float].\n"
19217"///\n"
19218"/// \\headerfile <x86intrin.h>\n"
19219"///\n"
19220"/// This intrinsic has no corresponding instruction.\n"
19221"///\n"
19222"/// \\param __a\n"
19223"/// A 128-bit integer vector.\n"
19224"/// \\returns A 128-bit floating-point vector of [4 x float] containing the same\n"
19225"/// bitwise pattern as the parameter.\n"
19226"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
19227"_mm_castsi128_ps(__m128i __a)\n"
19228"{\n"
19229" return (__m128)__a;\n"
19230"}\n"
19231"\n"
19232"/// Casts a 128-bit integer vector into a 128-bit floating-point vector\n"
19233"/// of [2 x double].\n"
19234"///\n"
19235"/// \\headerfile <x86intrin.h>\n"
19236"///\n"
19237"/// This intrinsic has no corresponding instruction.\n"
19238"///\n"
19239"/// \\param __a\n"
19240"/// A 128-bit integer vector.\n"
19241"/// \\returns A 128-bit floating-point vector of [2 x double] containing the same\n"
19242"/// bitwise pattern as the parameter.\n"
19243"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
19244"_mm_castsi128_pd(__m128i __a)\n"
19245"{\n"
19246" return (__m128d)__a;\n"
19247"}\n"
19248"\n"
19249"#if defined(__cplusplus)\n"
19250"extern \"C\" {\n"
19251"#endif\n"
19252"\n"
19253"/// Indicates that a spin loop is being executed for the purposes of\n"
19254"/// optimizing power consumption during the loop.\n"
19255"///\n"
19256"/// \\headerfile <x86intrin.h>\n"
19257"///\n"
19258"/// This intrinsic corresponds to the <c> PAUSE </c> instruction.\n"
19259"///\n"
19260"void _mm_pause(void);\n"
19261"\n"
19262"#if defined(__cplusplus)\n"
19263"} // extern \"C\"\n"
19264"#endif\n"
19265"#undef __DEFAULT_FN_ATTRS\n"
19266"#undef __DEFAULT_FN_ATTRS_MMX\n"
19267"\n"
19268"#define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))\n"
19269"\n"
19270"#define _MM_DENORMALS_ZERO_ON (0x0040)\n"
19271"#define _MM_DENORMALS_ZERO_OFF (0x0000)\n"
19272"\n"
19273"#define _MM_DENORMALS_ZERO_MASK (0x0040)\n"
19274"\n"
19275"#define _MM_GET_DENORMALS_ZERO_MODE() (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)\n"
19276"#define _MM_SET_DENORMALS_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x)))\n"
19277"\n"
19278"#endif /* __EMMINTRIN_H */\n"
19279"" } ,
19280 { "/builtins/f16cintrin.h" , "/*===---- f16cintrin.h - F16C intrinsics -----------------------------------===\n"
19281" *\n"
19282" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
19283" * of this software and associated documentation files (the \"Software\"), to deal\n"
19284" * in the Software without restriction, including without limitation the rights\n"
19285" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
19286" * copies of the Software, and to permit persons to whom the Software is\n"
19287" * furnished to do so, subject to the following conditions:\n"
19288" *\n"
19289" * The above copyright notice and this permission notice shall be included in\n"
19290" * all copies or substantial portions of the Software.\n"
19291" *\n"
19292" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
19293" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
19294" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
19295" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
19296" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
19297" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
19298" * THE SOFTWARE.\n"
19299" *\n"
19300" *===-----------------------------------------------------------------------===\n"
19301" */\n"
19302"\n"
19303"#if !defined __IMMINTRIN_H\n"
19304"#error \"Never use <f16cintrin.h> directly; include <immintrin.h> instead.\"\n"
19305"#endif\n"
19306"\n"
19307"#ifndef __F16CINTRIN_H\n"
19308"#define __F16CINTRIN_H\n"
19309"\n"
19310"/* Define the default attributes for the functions in this file. */\n"
19311"#define __DEFAULT_FN_ATTRS128 \\\n"
19312" __attribute__((__always_inline__, __nodebug__, __target__(\"f16c\"), __min_vector_width__(128)))\n"
19313"#define __DEFAULT_FN_ATTRS256 \\\n"
19314" __attribute__((__always_inline__, __nodebug__, __target__(\"f16c\"), __min_vector_width__(256)))\n"
19315"\n"
19316"/* NOTE: Intel documents the 128-bit versions of these as being in emmintrin.h,\n"
19317" * but that's because icc can emulate these without f16c using a library call.\n"
19318" * Since we don't do that let's leave these in f16cintrin.h.\n"
19319" */\n"
19320"\n"
19321"/// Converts a 16-bit half-precision float value into a 32-bit float\n"
19322"/// value.\n"
19323"///\n"
19324"/// \\headerfile <x86intrin.h>\n"
19325"///\n"
19326"/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.\n"
19327"///\n"
19328"/// \\param __a\n"
19329"/// A 16-bit half-precision float value.\n"
19330"/// \\returns The converted 32-bit float value.\n"
19331"static __inline float __DEFAULT_FN_ATTRS128\n"
19332"_cvtsh_ss(unsigned short __a)\n"
19333"{\n"
19334" __v8hi v = {(short)__a, 0, 0, 0, 0, 0, 0, 0};\n"
19335" __v4sf r = __builtin_ia32_vcvtph2ps(v);\n"
19336" return r[0];\n"
19337"}\n"
19338"\n"
19339"/// Converts a 32-bit single-precision float value to a 16-bit\n"
19340"/// half-precision float value.\n"
19341"///\n"
19342"/// \\headerfile <x86intrin.h>\n"
19343"///\n"
19344"/// \\code\n"
19345"/// unsigned short _cvtss_sh(float a, const int imm);\n"
19346"/// \\endcode\n"
19347"///\n"
19348"/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.\n"
19349"///\n"
19350"/// \\param a\n"
19351"/// A 32-bit single-precision float value to be converted to a 16-bit\n"
19352"/// half-precision float value.\n"
19353"/// \\param imm\n"
19354"/// An immediate value controlling rounding using bits [2:0]: \\n\n"
19355"/// 000: Nearest \\n\n"
19356"/// 001: Down \\n\n"
19357"/// 010: Up \\n\n"
19358"/// 011: Truncate \\n\n"
19359"/// 1XX: Use MXCSR.RC for rounding\n"
19360"/// \\returns The converted 16-bit half-precision float value.\n"
19361"#define _cvtss_sh(a, imm) \\\n"
19362" (unsigned short)(((__v8hi)__builtin_ia32_vcvtps2ph((__v4sf){a, 0, 0, 0}, \\\n"
19363" (imm)))[0])\n"
19364"\n"
19365"/// Converts a 128-bit vector containing 32-bit float values into a\n"
19366"/// 128-bit vector containing 16-bit half-precision float values.\n"
19367"///\n"
19368"/// \\headerfile <x86intrin.h>\n"
19369"///\n"
19370"/// \\code\n"
19371"/// __m128i _mm_cvtps_ph(__m128 a, const int imm);\n"
19372"/// \\endcode\n"
19373"///\n"
19374"/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.\n"
19375"///\n"
19376"/// \\param a\n"
19377"/// A 128-bit vector containing 32-bit float values.\n"
19378"/// \\param imm\n"
19379"/// An immediate value controlling rounding using bits [2:0]: \\n\n"
19380"/// 000: Nearest \\n\n"
19381"/// 001: Down \\n\n"
19382"/// 010: Up \\n\n"
19383"/// 011: Truncate \\n\n"
19384"/// 1XX: Use MXCSR.RC for rounding\n"
19385"/// \\returns A 128-bit vector containing converted 16-bit half-precision float\n"
19386"/// values. The lower 64 bits are used to store the converted 16-bit\n"
19387"/// half-precision floating-point values.\n"
19388"#define _mm_cvtps_ph(a, imm) \\\n"
19389" (__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm))\n"
19390"\n"
19391"/// Converts a 128-bit vector containing 16-bit half-precision float\n"
19392"/// values into a 128-bit vector containing 32-bit float values.\n"
19393"///\n"
19394"/// \\headerfile <x86intrin.h>\n"
19395"///\n"
19396"/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.\n"
19397"///\n"
19398"/// \\param __a\n"
19399"/// A 128-bit vector containing 16-bit half-precision float values. The lower\n"
19400"/// 64 bits are used in the conversion.\n"
19401"/// \\returns A 128-bit vector of [4 x float] containing converted float values.\n"
19402"static __inline __m128 __DEFAULT_FN_ATTRS128\n"
19403"_mm_cvtph_ps(__m128i __a)\n"
19404"{\n"
19405" return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a);\n"
19406"}\n"
19407"\n"
19408"/// Converts a 256-bit vector of [8 x float] into a 128-bit vector\n"
19409"/// containing 16-bit half-precision float values.\n"
19410"///\n"
19411"/// \\headerfile <x86intrin.h>\n"
19412"///\n"
19413"/// \\code\n"
19414"/// __m128i _mm256_cvtps_ph(__m256 a, const int imm);\n"
19415"/// \\endcode\n"
19416"///\n"
19417"/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.\n"
19418"///\n"
19419"/// \\param a\n"
19420"/// A 256-bit vector containing 32-bit single-precision float values to be\n"
19421"/// converted to 16-bit half-precision float values.\n"
19422"/// \\param imm\n"
19423"/// An immediate value controlling rounding using bits [2:0]: \\n\n"
19424"/// 000: Nearest \\n\n"
19425"/// 001: Down \\n\n"
19426"/// 010: Up \\n\n"
19427"/// 011: Truncate \\n\n"
19428"/// 1XX: Use MXCSR.RC for rounding\n"
19429"/// \\returns A 128-bit vector containing the converted 16-bit half-precision\n"
19430"/// float values.\n"
19431"#define _mm256_cvtps_ph(a, imm) \\\n"
19432" (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm))\n"
19433"\n"
19434"/// Converts a 128-bit vector containing 16-bit half-precision float\n"
19435"/// values into a 256-bit vector of [8 x float].\n"
19436"///\n"
19437"/// \\headerfile <x86intrin.h>\n"
19438"///\n"
19439"/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.\n"
19440"///\n"
19441"/// \\param __a\n"
19442"/// A 128-bit vector containing 16-bit half-precision float values to be\n"
19443"/// converted to 32-bit single-precision float values.\n"
19444"/// \\returns A vector of [8 x float] containing the converted 32-bit\n"
19445"/// single-precision float values.\n"
19446"static __inline __m256 __DEFAULT_FN_ATTRS256\n"
19447"_mm256_cvtph_ps(__m128i __a)\n"
19448"{\n"
19449" return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);\n"
19450"}\n"
19451"\n"
19452"#undef __DEFAULT_FN_ATTRS128\n"
19453"#undef __DEFAULT_FN_ATTRS256\n"
19454"\n"
19455"#endif /* __F16CINTRIN_H */\n"
19456"" } ,
19457 { "/builtins/float.h" , "/*===---- float.h - Characteristics of floating point types ----------------===\n"
19458" *\n"
19459" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
19460" * of this software and associated documentation files (the \"Software\"), to deal\n"
19461" * in the Software without restriction, including without limitation the rights\n"
19462" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
19463" * copies of the Software, and to permit persons to whom the Software is\n"
19464" * furnished to do so, subject to the following conditions:\n"
19465" *\n"
19466" * The above copyright notice and this permission notice shall be included in\n"
19467" * all copies or substantial portions of the Software.\n"
19468" *\n"
19469" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
19470" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
19471" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
19472" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
19473" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
19474" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
19475" * THE SOFTWARE.\n"
19476" *\n"
19477" *===-----------------------------------------------------------------------===\n"
19478" */\n"
19479"\n"
19480"#ifndef __FLOAT_H\n"
19481"#define __FLOAT_H\n"
19482"\n"
19483"/* If we're on MinGW, fall back to the system's float.h, which might have\n"
19484" * additional definitions provided for Windows.\n"
19485" * For more details see http://msdn.microsoft.com/en-us/library/y0ybw9fy.aspx\n"
19486" *\n"
19487" * Also fall back on Darwin to allow additional definitions and\n"
19488" * implementation-defined values.\n"
19489" */\n"
19490"#if (defined(__APPLE__) || (defined(__MINGW32__) || defined(_MSC_VER))) && \\\n"
19491" __STDC_HOSTED__ && __has_include_next(<float.h>)\n"
19492"\n"
19493"/* Prior to Apple's 10.7 SDK, float.h SDK header used to apply an extra level\n"
19494" * of #include_next<float.h> to keep Metrowerks compilers happy. Avoid this\n"
19495" * extra indirection.\n"
19496" */\n"
19497"#ifdef __APPLE__\n"
19498"#define _FLOAT_H_\n"
19499"#endif\n"
19500"\n"
19501"# include_next <float.h>\n"
19502"\n"
19503"/* Undefine anything that we'll be redefining below. */\n"
19504"# undef FLT_EVAL_METHOD\n"
19505"# undef FLT_ROUNDS\n"
19506"# undef FLT_RADIX\n"
19507"# undef FLT_MANT_DIG\n"
19508"# undef DBL_MANT_DIG\n"
19509"# undef LDBL_MANT_DIG\n"
19510"# if __STDC_VERSION__ >= 199901L || !defined(__STRICT_ANSI__)\n"
19511"# undef DECIMAL_DIG\n"
19512"# endif\n"
19513"# undef FLT_DIG\n"
19514"# undef DBL_DIG\n"
19515"# undef LDBL_DIG\n"
19516"# undef FLT_MIN_EXP\n"
19517"# undef DBL_MIN_EXP\n"
19518"# undef LDBL_MIN_EXP\n"
19519"# undef FLT_MIN_10_EXP\n"
19520"# undef DBL_MIN_10_EXP\n"
19521"# undef LDBL_MIN_10_EXP\n"
19522"# undef FLT_MAX_EXP\n"
19523"# undef DBL_MAX_EXP\n"
19524"# undef LDBL_MAX_EXP\n"
19525"# undef FLT_MAX_10_EXP\n"
19526"# undef DBL_MAX_10_EXP\n"
19527"# undef LDBL_MAX_10_EXP\n"
19528"# undef FLT_MAX\n"
19529"# undef DBL_MAX\n"
19530"# undef LDBL_MAX\n"
19531"# undef FLT_EPSILON\n"
19532"# undef DBL_EPSILON\n"
19533"# undef LDBL_EPSILON\n"
19534"# undef FLT_MIN\n"
19535"# undef DBL_MIN\n"
19536"# undef LDBL_MIN\n"
19537"# if __STDC_VERSION__ >= 201112L || !defined(__STRICT_ANSI__)\n"
19538"# undef FLT_TRUE_MIN\n"
19539"# undef DBL_TRUE_MIN\n"
19540"# undef LDBL_TRUE_MIN\n"
19541"# undef FLT_DECIMAL_DIG\n"
19542"# undef DBL_DECIMAL_DIG\n"
19543"# undef LDBL_DECIMAL_DIG\n"
19544"# endif\n"
19545"#endif\n"
19546"\n"
19547"/* Characteristics of floating point types, C99 5.2.4.2.2 */\n"
19548"\n"
19549"#define FLT_EVAL_METHOD __FLT_EVAL_METHOD__\n"
19550"#define FLT_ROUNDS (__builtin_flt_rounds())\n"
19551"#define FLT_RADIX __FLT_RADIX__\n"
19552"\n"
19553"#define FLT_MANT_DIG __FLT_MANT_DIG__\n"
19554"#define DBL_MANT_DIG __DBL_MANT_DIG__\n"
19555"#define LDBL_MANT_DIG __LDBL_MANT_DIG__\n"
19556"\n"
19557"#if __STDC_VERSION__ >= 199901L || !defined(__STRICT_ANSI__)\n"
19558"# define DECIMAL_DIG __DECIMAL_DIG__\n"
19559"#endif\n"
19560"\n"
19561"#define FLT_DIG __FLT_DIG__\n"
19562"#define DBL_DIG __DBL_DIG__\n"
19563"#define LDBL_DIG __LDBL_DIG__\n"
19564"\n"
19565"#define FLT_MIN_EXP __FLT_MIN_EXP__\n"
19566"#define DBL_MIN_EXP __DBL_MIN_EXP__\n"
19567"#define LDBL_MIN_EXP __LDBL_MIN_EXP__\n"
19568"\n"
19569"#define FLT_MIN_10_EXP __FLT_MIN_10_EXP__\n"
19570"#define DBL_MIN_10_EXP __DBL_MIN_10_EXP__\n"
19571"#define LDBL_MIN_10_EXP __LDBL_MIN_10_EXP__\n"
19572"\n"
19573"#define FLT_MAX_EXP __FLT_MAX_EXP__\n"
19574"#define DBL_MAX_EXP __DBL_MAX_EXP__\n"
19575"#define LDBL_MAX_EXP __LDBL_MAX_EXP__\n"
19576"\n"
19577"#define FLT_MAX_10_EXP __FLT_MAX_10_EXP__\n"
19578"#define DBL_MAX_10_EXP __DBL_MAX_10_EXP__\n"
19579"#define LDBL_MAX_10_EXP __LDBL_MAX_10_EXP__\n"
19580"\n"
19581"#define FLT_MAX __FLT_MAX__\n"
19582"#define DBL_MAX __DBL_MAX__\n"
19583"#define LDBL_MAX __LDBL_MAX__\n"
19584"\n"
19585"#define FLT_EPSILON __FLT_EPSILON__\n"
19586"#define DBL_EPSILON __DBL_EPSILON__\n"
19587"#define LDBL_EPSILON __LDBL_EPSILON__\n"
19588"\n"
19589"#define FLT_MIN __FLT_MIN__\n"
19590"#define DBL_MIN __DBL_MIN__\n"
19591"#define LDBL_MIN __LDBL_MIN__\n"
19592"\n"
19593"#if __STDC_VERSION__ >= 201112L || !defined(__STRICT_ANSI__)\n"
19594"# define FLT_TRUE_MIN __FLT_DENORM_MIN__\n"
19595"# define DBL_TRUE_MIN __DBL_DENORM_MIN__\n"
19596"# define LDBL_TRUE_MIN __LDBL_DENORM_MIN__\n"
19597"# define FLT_DECIMAL_DIG __FLT_DECIMAL_DIG__\n"
19598"# define DBL_DECIMAL_DIG __DBL_DECIMAL_DIG__\n"
19599"# define LDBL_DECIMAL_DIG __LDBL_DECIMAL_DIG__\n"
19600"#endif\n"
19601"\n"
19602"#ifdef __STDC_WANT_IEC_60559_TYPES_EXT__\n"
19603"# define FLT16_MANT_DIG __FLT16_MANT_DIG__\n"
19604"# define FLT16_DECIMAL_DIG __FLT16_DECIMAL_DIG__\n"
19605"# define FLT16_DIG __FLT16_DIG__\n"
19606"# define FLT16_MIN_EXP __FLT16_MIN_EXP__\n"
19607"# define FLT16_MIN_10_EXP __FLT16_MIN_10_EXP__\n"
19608"# define FLT16_MAX_EXP __FLT16_MAX_EXP__\n"
19609"# define FLT16_MAX_10_EXP __FLT16_MAX_10_EXP__\n"
19610"# define FLT16_MAX __FLT16_MAX__\n"
19611"# define FLT16_EPSILON __FLT16_EPSILON__\n"
19612"# define FLT16_MIN __FLT16_MIN__\n"
19613"# define FLT16_TRUE_MIN __FLT16_TRUE_MIN__\n"
19614"#endif /* __STDC_WANT_IEC_60559_TYPES_EXT__ */\n"
19615"\n"
19616"#endif /* __FLOAT_H */\n"
19617"" } ,
19618 { "/builtins/fma4intrin.h" , "/*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------===\n"
19619" *\n"
19620" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
19621" * of this software and associated documentation files (the \"Software\"), to deal\n"
19622" * in the Software without restriction, including without limitation the rights\n"
19623" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
19624" * copies of the Software, and to permit persons to whom the Software is\n"
19625" * furnished to do so, subject to the following conditions:\n"
19626" *\n"
19627" * The above copyright notice and this permission notice shall be included in\n"
19628" * all copies or substantial portions of the Software.\n"
19629" *\n"
19630" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
19631" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
19632" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
19633" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
19634" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
19635" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
19636" * THE SOFTWARE.\n"
19637" *\n"
19638" *===-----------------------------------------------------------------------===\n"
19639" */\n"
19640"\n"
19641"#ifndef __X86INTRIN_H\n"
19642"#error \"Never use <fma4intrin.h> directly; include <x86intrin.h> instead.\"\n"
19643"#endif\n"
19644"\n"
19645"#ifndef __FMA4INTRIN_H\n"
19646"#define __FMA4INTRIN_H\n"
19647"\n"
19648"#include <pmmintrin.h>\n"
19649"\n"
19650"/* Define the default attributes for the functions in this file. */\n"
19651"#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__(\"fma4\"), __min_vector_width__(128)))\n"
19652"#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__(\"fma4\"), __min_vector_width__(256)))\n"
19653"\n"
19654"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19655"_mm_macc_ps(__m128 __A, __m128 __B, __m128 __C)\n"
19656"{\n"
19657" return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n"
19658"}\n"
19659"\n"
19660"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19661"_mm_macc_pd(__m128d __A, __m128d __B, __m128d __C)\n"
19662"{\n"
19663" return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);\n"
19664"}\n"
19665"\n"
19666"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19667"_mm_macc_ss(__m128 __A, __m128 __B, __m128 __C)\n"
19668"{\n"
19669" return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n"
19670"}\n"
19671"\n"
19672"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19673"_mm_macc_sd(__m128d __A, __m128d __B, __m128d __C)\n"
19674"{\n"
19675" return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);\n"
19676"}\n"
19677"\n"
19678"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19679"_mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)\n"
19680"{\n"
19681" return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n"
19682"}\n"
19683"\n"
19684"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19685"_mm_msub_pd(__m128d __A, __m128d __B, __m128d __C)\n"
19686"{\n"
19687" return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);\n"
19688"}\n"
19689"\n"
19690"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19691"_mm_msub_ss(__m128 __A, __m128 __B, __m128 __C)\n"
19692"{\n"
19693" return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n"
19694"}\n"
19695"\n"
19696"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19697"_mm_msub_sd(__m128d __A, __m128d __B, __m128d __C)\n"
19698"{\n"
19699" return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, -(__v2df)__C);\n"
19700"}\n"
19701"\n"
19702"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19703"_mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C)\n"
19704"{\n"
19705" return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n"
19706"}\n"
19707"\n"
19708"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19709"_mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C)\n"
19710"{\n"
19711" return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);\n"
19712"}\n"
19713"\n"
19714"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19715"_mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C)\n"
19716"{\n"
19717" return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n"
19718"}\n"
19719"\n"
19720"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19721"_mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C)\n"
19722"{\n"
19723" return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);\n"
19724"}\n"
19725"\n"
19726"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19727"_mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C)\n"
19728"{\n"
19729" return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n"
19730"}\n"
19731"\n"
19732"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19733"_mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C)\n"
19734"{\n"
19735" return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);\n"
19736"}\n"
19737"\n"
19738"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19739"_mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C)\n"
19740"{\n"
19741" return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n"
19742"}\n"
19743"\n"
19744"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19745"_mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C)\n"
19746"{\n"
19747" return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);\n"
19748"}\n"
19749"\n"
19750"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19751"_mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C)\n"
19752"{\n"
19753" return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n"
19754"}\n"
19755"\n"
19756"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19757"_mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C)\n"
19758"{\n"
19759" return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);\n"
19760"}\n"
19761"\n"
19762"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19763"_mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C)\n"
19764"{\n"
19765" return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n"
19766"}\n"
19767"\n"
19768"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19769"_mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C)\n"
19770"{\n"
19771" return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);\n"
19772"}\n"
19773"\n"
19774"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
19775"_mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C)\n"
19776"{\n"
19777" return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);\n"
19778"}\n"
19779"\n"
19780"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
19781"_mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C)\n"
19782"{\n"
19783" return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);\n"
19784"}\n"
19785"\n"
19786"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
19787"_mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C)\n"
19788"{\n"
19789" return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);\n"
19790"}\n"
19791"\n"
19792"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
19793"_mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C)\n"
19794"{\n"
19795" return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);\n"
19796"}\n"
19797"\n"
19798"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
19799"_mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C)\n"
19800"{\n"
19801" return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);\n"
19802"}\n"
19803"\n"
19804"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
19805"_mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C)\n"
19806"{\n"
19807" return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);\n"
19808"}\n"
19809"\n"
19810"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
19811"_mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C)\n"
19812"{\n"
19813" return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);\n"
19814"}\n"
19815"\n"
19816"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
19817"_mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C)\n"
19818"{\n"
19819" return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);\n"
19820"}\n"
19821"\n"
19822"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
19823"_mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C)\n"
19824"{\n"
19825" return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);\n"
19826"}\n"
19827"\n"
19828"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
19829"_mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C)\n"
19830"{\n"
19831" return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);\n"
19832"}\n"
19833"\n"
19834"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
19835"_mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C)\n"
19836"{\n"
19837" return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);\n"
19838"}\n"
19839"\n"
19840"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
19841"_mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C)\n"
19842"{\n"
19843" return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);\n"
19844"}\n"
19845"\n"
19846"#undef __DEFAULT_FN_ATTRS128\n"
19847"#undef __DEFAULT_FN_ATTRS256\n"
19848"\n"
19849"#endif /* __FMA4INTRIN_H */\n"
19850"" } ,
19851 { "/builtins/fmaintrin.h" , "/*===---- fmaintrin.h - FMA intrinsics -------------------------------------===\n"
19852" *\n"
19853" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
19854" * of this software and associated documentation files (the \"Software\"), to deal\n"
19855" * in the Software without restriction, including without limitation the rights\n"
19856" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
19857" * copies of the Software, and to permit persons to whom the Software is\n"
19858" * furnished to do so, subject to the following conditions:\n"
19859" *\n"
19860" * The above copyright notice and this permission notice shall be included in\n"
19861" * all copies or substantial portions of the Software.\n"
19862" *\n"
19863" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
19864" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
19865" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
19866" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
19867" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
19868" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
19869" * THE SOFTWARE.\n"
19870" *\n"
19871" *===-----------------------------------------------------------------------===\n"
19872" */\n"
19873"\n"
19874"#ifndef __IMMINTRIN_H\n"
19875"#error \"Never use <fmaintrin.h> directly; include <immintrin.h> instead.\"\n"
19876"#endif\n"
19877"\n"
19878"#ifndef __FMAINTRIN_H\n"
19879"#define __FMAINTRIN_H\n"
19880"\n"
19881"/* Define the default attributes for the functions in this file. */\n"
19882"#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__(\"fma\"), __min_vector_width__(128)))\n"
19883"#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__(\"fma\"), __min_vector_width__(256)))\n"
19884"\n"
19885"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19886"_mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C)\n"
19887"{\n"
19888" return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n"
19889"}\n"
19890"\n"
19891"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19892"_mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)\n"
19893"{\n"
19894" return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);\n"
19895"}\n"
19896"\n"
19897"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19898"_mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C)\n"
19899"{\n"
19900" return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n"
19901"}\n"
19902"\n"
19903"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19904"_mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C)\n"
19905"{\n"
19906" return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);\n"
19907"}\n"
19908"\n"
19909"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19910"_mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)\n"
19911"{\n"
19912" return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n"
19913"}\n"
19914"\n"
19915"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19916"_mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)\n"
19917"{\n"
19918" return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);\n"
19919"}\n"
19920"\n"
19921"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19922"_mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)\n"
19923"{\n"
19924" return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n"
19925"}\n"
19926"\n"
19927"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19928"_mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)\n"
19929"{\n"
19930" return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C);\n"
19931"}\n"
19932"\n"
19933"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19934"_mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)\n"
19935"{\n"
19936" return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n"
19937"}\n"
19938"\n"
19939"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19940"_mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)\n"
19941"{\n"
19942" return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);\n"
19943"}\n"
19944"\n"
19945"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19946"_mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)\n"
19947"{\n"
19948" return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C);\n"
19949"}\n"
19950"\n"
19951"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19952"_mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)\n"
19953"{\n"
19954" return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C);\n"
19955"}\n"
19956"\n"
19957"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19958"_mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)\n"
19959"{\n"
19960" return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n"
19961"}\n"
19962"\n"
19963"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19964"_mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)\n"
19965"{\n"
19966" return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);\n"
19967"}\n"
19968"\n"
19969"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19970"_mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)\n"
19971"{\n"
19972" return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C);\n"
19973"}\n"
19974"\n"
19975"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19976"_mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)\n"
19977"{\n"
19978" return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C);\n"
19979"}\n"
19980"\n"
19981"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19982"_mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C)\n"
19983"{\n"
19984" return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n"
19985"}\n"
19986"\n"
19987"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
19988"_mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C)\n"
19989"{\n"
19990" return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);\n"
19991"}\n"
19992"\n"
19993"static __inline__ __m128 __DEFAULT_FN_ATTRS128\n"
19994"_mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C)\n"
19995"{\n"
19996" return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n"
19997"}\n"
19998"\n"
19999"static __inline__ __m128d __DEFAULT_FN_ATTRS128\n"
20000"_mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C)\n"
20001"{\n"
20002" return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);\n"
20003"}\n"
20004"\n"
20005"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
20006"_mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C)\n"
20007"{\n"
20008" return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);\n"
20009"}\n"
20010"\n"
20011"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
20012"_mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C)\n"
20013"{\n"
20014" return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);\n"
20015"}\n"
20016"\n"
20017"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
20018"_mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)\n"
20019"{\n"
20020" return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);\n"
20021"}\n"
20022"\n"
20023"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
20024"_mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)\n"
20025"{\n"
20026" return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);\n"
20027"}\n"
20028"\n"
20029"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
20030"_mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)\n"
20031"{\n"
20032" return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);\n"
20033"}\n"
20034"\n"
20035"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
20036"_mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)\n"
20037"{\n"
20038" return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);\n"
20039"}\n"
20040"\n"
20041"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
20042"_mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)\n"
20043"{\n"
20044" return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);\n"
20045"}\n"
20046"\n"
20047"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
20048"_mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)\n"
20049"{\n"
20050" return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);\n"
20051"}\n"
20052"\n"
20053"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
20054"_mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C)\n"
20055"{\n"
20056" return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);\n"
20057"}\n"
20058"\n"
20059"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
20060"_mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C)\n"
20061"{\n"
20062" return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);\n"
20063"}\n"
20064"\n"
20065"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
20066"_mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C)\n"
20067"{\n"
20068" return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);\n"
20069"}\n"
20070"\n"
20071"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
20072"_mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C)\n"
20073"{\n"
20074" return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);\n"
20075"}\n"
20076"\n"
20077"#undef __DEFAULT_FN_ATTRS128\n"
20078"#undef __DEFAULT_FN_ATTRS256\n"
20079"\n"
20080"#endif /* __FMAINTRIN_H */\n"
20081"" } ,
20082 { "/builtins/fxsrintrin.h" , "/*===---- fxsrintrin.h - FXSR intrinsic ------------------------------------===\n"
20083" *\n"
20084" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
20085" * of this software and associated documentation files (the \"Software\"), to deal\n"
20086" * in the Software without restriction, including without limitation the rights\n"
20087" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
20088" * copies of the Software, and to permit persons to whom the Software is\n"
20089" * furnished to do so, subject to the following conditions:\n"
20090" *\n"
20091" * The above copyright notice and this permission notice shall be included in\n"
20092" * all copies or substantial portions of the Software.\n"
20093" *\n"
20094" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
20095" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
20096" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
20097" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
20098" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
20099" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
20100" * THE SOFTWARE.\n"
20101" *\n"
20102" *===-----------------------------------------------------------------------===\n"
20103" */\n"
20104"\n"
20105"#ifndef __IMMINTRIN_H\n"
20106"#error \"Never use <fxsrintrin.h> directly; include <immintrin.h> instead.\"\n"
20107"#endif\n"
20108"\n"
20109"#ifndef __FXSRINTRIN_H\n"
20110"#define __FXSRINTRIN_H\n"
20111"\n"
20112"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"fxsr\")))\n"
20113"\n"
20114"/// Saves the XMM, MMX, MXCSR and x87 FPU registers into a 512-byte\n"
20115"/// memory region pointed to by the input parameter \\a __p.\n"
20116"///\n"
20117"/// \\headerfile <x86intrin.h>\n"
20118"///\n"
20119"/// This intrinsic corresponds to the <c> FXSAVE </c> instruction.\n"
20120"///\n"
20121"/// \\param __p\n"
20122"/// A pointer to a 512-byte memory region. The beginning of this memory\n"
20123"/// region should be aligned on a 16-byte boundary.\n"
20124"static __inline__ void __DEFAULT_FN_ATTRS\n"
20125"_fxsave(void *__p)\n"
20126"{\n"
20127" __builtin_ia32_fxsave(__p);\n"
20128"}\n"
20129"\n"
20130"/// Restores the XMM, MMX, MXCSR and x87 FPU registers from the 512-byte\n"
20131"/// memory region pointed to by the input parameter \\a __p. The contents of\n"
20132"/// this memory region should have been written to by a previous \\c _fxsave\n"
20133"/// or \\c _fxsave64 intrinsic.\n"
20134"///\n"
20135"/// \\headerfile <x86intrin.h>\n"
20136"///\n"
20137"/// This intrinsic corresponds to the <c> FXRSTOR </c> instruction.\n"
20138"///\n"
20139"/// \\param __p\n"
20140"/// A pointer to a 512-byte memory region. The beginning of this memory\n"
20141"/// region should be aligned on a 16-byte boundary.\n"
20142"static __inline__ void __DEFAULT_FN_ATTRS\n"
20143"_fxrstor(void *__p)\n"
20144"{\n"
20145" __builtin_ia32_fxrstor(__p);\n"
20146"}\n"
20147"\n"
20148"#ifdef __x86_64__\n"
20149"/// Saves the XMM, MMX, MXCSR and x87 FPU registers into a 512-byte\n"
20150"/// memory region pointed to by the input parameter \\a __p.\n"
20151"///\n"
20152"/// \\headerfile <x86intrin.h>\n"
20153"///\n"
20154"/// This intrinsic corresponds to the <c> FXSAVE64 </c> instruction.\n"
20155"///\n"
20156"/// \\param __p\n"
20157"/// A pointer to a 512-byte memory region. The beginning of this memory\n"
20158"/// region should be aligned on a 16-byte boundary.\n"
20159"static __inline__ void __DEFAULT_FN_ATTRS\n"
20160"_fxsave64(void *__p)\n"
20161"{\n"
20162" __builtin_ia32_fxsave64(__p);\n"
20163"}\n"
20164"\n"
20165"/// Restores the XMM, MMX, MXCSR and x87 FPU registers from the 512-byte\n"
20166"/// memory region pointed to by the input parameter \\a __p. The contents of\n"
20167"/// this memory region should have been written to by a previous \\c _fxsave\n"
20168"/// or \\c _fxsave64 intrinsic.\n"
20169"///\n"
20170"/// \\headerfile <x86intrin.h>\n"
20171"///\n"
20172"/// This intrinsic corresponds to the <c> FXRSTOR64 </c> instruction.\n"
20173"///\n"
20174"/// \\param __p\n"
20175"/// A pointer to a 512-byte memory region. The beginning of this memory\n"
20176"/// region should be aligned on a 16-byte boundary.\n"
20177"static __inline__ void __DEFAULT_FN_ATTRS\n"
20178"_fxrstor64(void *__p)\n"
20179"{\n"
20180" __builtin_ia32_fxrstor64(__p);\n"
20181"}\n"
20182"#endif\n"
20183"\n"
20184"#undef __DEFAULT_FN_ATTRS\n"
20185"\n"
20186"#endif\n"
20187"" } ,
20188 { "/builtins/gfniintrin.h" , "/*===----------------- gfniintrin.h - GFNI intrinsics ----------------------===\n"
20189" *\n"
20190" *\n"
20191" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
20192" * of this software and associated documentation files (the \"Software\"), to deal\n"
20193" * in the Software without restriction, including without limitation the rights\n"
20194" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
20195" * copies of the Software, and to permit persons to whom the Software is\n"
20196" * furnished to do so, subject to the following conditions:\n"
20197" *\n"
20198" * The above copyright notice and this permission notice shall be included in\n"
20199" * all copies or substantial portions of the Software.\n"
20200" *\n"
20201" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
20202" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
20203" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
20204" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
20205" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
20206" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
20207" * THE SOFTWARE.\n"
20208" *\n"
20209" *===-----------------------------------------------------------------------===\n"
20210" */\n"
20211"#ifndef __IMMINTRIN_H\n"
20212"#error \"Never use <gfniintrin.h> directly; include <immintrin.h> instead.\"\n"
20213"#endif\n"
20214"\n"
20215"#ifndef __GFNIINTRIN_H\n"
20216"#define __GFNIINTRIN_H\n"
20217"\n"
20218"\n"
20219"#define _mm_gf2p8affineinv_epi64_epi8(A, B, I) \\\n"
20220" (__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \\\n"
20221" (__v16qi)(__m128i)(B), \\\n"
20222" (char)(I))\n"
20223"\n"
20224"#define _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \\\n"
20225" (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \\\n"
20226" (__v16qi)_mm_gf2p8affineinv_epi64_epi8(A, B, I), \\\n"
20227" (__v16qi)(__m128i)(S))\n"
20228"\n"
20229"\n"
20230"#define _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \\\n"
20231" (__m128i)_mm_mask_gf2p8affineinv_epi64_epi8((__m128i)_mm_setzero_si128(), \\\n"
20232" U, A, B, I)\n"
20233"\n"
20234"\n"
20235"#define _mm256_gf2p8affineinv_epi64_epi8(A, B, I) \\\n"
20236" (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A), \\\n"
20237" (__v32qi)(__m256i)(B), \\\n"
20238" (char)(I))\n"
20239"\n"
20240"#define _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \\\n"
20241" (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \\\n"
20242" (__v32qi)_mm256_gf2p8affineinv_epi64_epi8(A, B, I), \\\n"
20243" (__v32qi)(__m256i)(S))\n"
20244"\n"
20245"#define _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \\\n"
20246" (__m256i)_mm256_mask_gf2p8affineinv_epi64_epi8((__m256i)_mm256_setzero_si256(), \\\n"
20247" U, A, B, I)\n"
20248"\n"
20249"\n"
20250"#define _mm512_gf2p8affineinv_epi64_epi8(A, B, I) \\\n"
20251" (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi((__v64qi)(__m512i)(A), \\\n"
20252" (__v64qi)(__m512i)(B), \\\n"
20253" (char)(I))\n"
20254"\n"
20255"#define _mm512_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \\\n"
20256" (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \\\n"
20257" (__v64qi)_mm512_gf2p8affineinv_epi64_epi8(A, B, I), \\\n"
20258" (__v64qi)(__m512i)(S))\n"
20259"\n"
20260"#define _mm512_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \\\n"
20261" (__m512i)_mm512_mask_gf2p8affineinv_epi64_epi8((__m512i)_mm512_setzero_si512(), \\\n"
20262" U, A, B, I)\n"
20263"\n"
20264"#define _mm_gf2p8affine_epi64_epi8(A, B, I) \\\n"
20265" (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)(__m128i)(A), \\\n"
20266" (__v16qi)(__m128i)(B), \\\n"
20267" (char)(I))\n"
20268"\n"
20269"#define _mm_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \\\n"
20270" (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \\\n"
20271" (__v16qi)_mm_gf2p8affine_epi64_epi8(A, B, I), \\\n"
20272" (__v16qi)(__m128i)(S))\n"
20273"\n"
20274"\n"
20275"#define _mm_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \\\n"
20276" (__m128i)_mm_mask_gf2p8affine_epi64_epi8((__m128i)_mm_setzero_si128(), \\\n"
20277" U, A, B, I)\n"
20278"\n"
20279"\n"
20280"#define _mm256_gf2p8affine_epi64_epi8(A, B, I) \\\n"
20281" (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)(__m256i)(A), \\\n"
20282" (__v32qi)(__m256i)(B), \\\n"
20283" (char)(I))\n"
20284"\n"
20285"#define _mm256_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \\\n"
20286" (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \\\n"
20287" (__v32qi)_mm256_gf2p8affine_epi64_epi8(A, B, I), \\\n"
20288" (__v32qi)(__m256i)(S))\n"
20289"\n"
20290"#define _mm256_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \\\n"
20291" (__m256i)_mm256_mask_gf2p8affine_epi64_epi8((__m256i)_mm256_setzero_si256(), \\\n"
20292" U, A, B, I)\n"
20293"\n"
20294"\n"
20295"#define _mm512_gf2p8affine_epi64_epi8(A, B, I) \\\n"
20296" (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi((__v64qi)(__m512i)(A), \\\n"
20297" (__v64qi)(__m512i)(B), \\\n"
20298" (char)(I))\n"
20299"\n"
20300"#define _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \\\n"
20301" (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \\\n"
20302" (__v64qi)_mm512_gf2p8affine_epi64_epi8(A, B, I), \\\n"
20303" (__v64qi)(__m512i)(S))\n"
20304"\n"
20305"#define _mm512_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \\\n"
20306" (__m512i)_mm512_mask_gf2p8affine_epi64_epi8((__m512i)_mm512_setzero_si512(), \\\n"
20307" U, A, B, I)\n"
20308"\n"
20309"/* Default attributes for simple form (no masking). */\n"
20310"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"gfni\"), __min_vector_width__(128)))\n"
20311"\n"
20312"/* Default attributes for YMM unmasked form. */\n"
20313"#define __DEFAULT_FN_ATTRS_Y __attribute__((__always_inline__, __nodebug__, __target__(\"avx,gfni\"), __min_vector_width__(256)))\n"
20314"\n"
20315"/* Default attributes for ZMM forms. */\n"
20316"#define __DEFAULT_FN_ATTRS_Z __attribute__((__always_inline__, __nodebug__, __target__(\"avx512bw,gfni\"), __min_vector_width__(512)))\n"
20317"\n"
20318"/* Default attributes for VLX forms. */\n"
20319"#define __DEFAULT_FN_ATTRS_VL128 __attribute__((__always_inline__, __nodebug__, __target__(\"avx512bw,avx512vl,gfni\"), __min_vector_width__(128)))\n"
20320"#define __DEFAULT_FN_ATTRS_VL256 __attribute__((__always_inline__, __nodebug__, __target__(\"avx512bw,avx512vl,gfni\"), __min_vector_width__(256)))\n"
20321"\n"
20322"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
20323"_mm_gf2p8mul_epi8(__m128i __A, __m128i __B)\n"
20324"{\n"
20325" return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi((__v16qi) __A,\n"
20326" (__v16qi) __B);\n"
20327"}\n"
20328"\n"
20329"static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128\n"
20330"_mm_mask_gf2p8mul_epi8(__m128i __S, __mmask16 __U, __m128i __A, __m128i __B)\n"
20331"{\n"
20332" return (__m128i) __builtin_ia32_selectb_128(__U,\n"
20333" (__v16qi) _mm_gf2p8mul_epi8(__A, __B),\n"
20334" (__v16qi) __S);\n"
20335"}\n"
20336"\n"
20337"static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128\n"
20338"_mm_maskz_gf2p8mul_epi8(__mmask16 __U, __m128i __A, __m128i __B)\n"
20339"{\n"
20340" return _mm_mask_gf2p8mul_epi8((__m128i)_mm_setzero_si128(),\n"
20341" __U, __A, __B);\n"
20342"}\n"
20343"\n"
20344"static __inline__ __m256i __DEFAULT_FN_ATTRS_Y\n"
20345"_mm256_gf2p8mul_epi8(__m256i __A, __m256i __B)\n"
20346"{\n"
20347" return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi((__v32qi) __A,\n"
20348" (__v32qi) __B);\n"
20349"}\n"
20350"\n"
20351"static __inline__ __m256i __DEFAULT_FN_ATTRS_VL256\n"
20352"_mm256_mask_gf2p8mul_epi8(__m256i __S, __mmask32 __U, __m256i __A, __m256i __B)\n"
20353"{\n"
20354" return (__m256i) __builtin_ia32_selectb_256(__U,\n"
20355" (__v32qi) _mm256_gf2p8mul_epi8(__A, __B),\n"
20356" (__v32qi) __S);\n"
20357"}\n"
20358"\n"
20359"static __inline__ __m256i __DEFAULT_FN_ATTRS_VL256\n"
20360"_mm256_maskz_gf2p8mul_epi8(__mmask32 __U, __m256i __A, __m256i __B)\n"
20361"{\n"
20362" return _mm256_mask_gf2p8mul_epi8((__m256i)_mm256_setzero_si256(),\n"
20363" __U, __A, __B);\n"
20364"}\n"
20365"\n"
20366"static __inline__ __m512i __DEFAULT_FN_ATTRS_Z\n"
20367"_mm512_gf2p8mul_epi8(__m512i __A, __m512i __B)\n"
20368"{\n"
20369" return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi((__v64qi) __A,\n"
20370" (__v64qi) __B);\n"
20371"}\n"
20372"\n"
20373"static __inline__ __m512i __DEFAULT_FN_ATTRS_Z\n"
20374"_mm512_mask_gf2p8mul_epi8(__m512i __S, __mmask64 __U, __m512i __A, __m512i __B)\n"
20375"{\n"
20376" return (__m512i) __builtin_ia32_selectb_512(__U,\n"
20377" (__v64qi) _mm512_gf2p8mul_epi8(__A, __B),\n"
20378" (__v64qi) __S);\n"
20379"}\n"
20380"\n"
20381"static __inline__ __m512i __DEFAULT_FN_ATTRS_Z\n"
20382"_mm512_maskz_gf2p8mul_epi8(__mmask64 __U, __m512i __A, __m512i __B)\n"
20383"{\n"
20384" return _mm512_mask_gf2p8mul_epi8((__m512i)_mm512_setzero_si512(),\n"
20385" __U, __A, __B);\n"
20386"}\n"
20387"\n"
20388"#undef __DEFAULT_FN_ATTRS\n"
20389"#undef __DEFAULT_FN_ATTRS_Y\n"
20390"#undef __DEFAULT_FN_ATTRS_Z\n"
20391"#undef __DEFAULT_FN_ATTRS_VL128\n"
20392"#undef __DEFAULT_FN_ATTRS_VL256\n"
20393"\n"
20394"#endif /* __GFNIINTRIN_H */\n"
20395"\n"
20396"" } ,
20397 { "/builtins/htmintrin.h" , "/*===---- htmintrin.h - Standard header for PowerPC HTM ---------------===*\\\n"
20398" *\n"
20399" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
20400" * of this software and associated documentation files (the \"Software\"), to deal\n"
20401" * in the Software without restriction, including without limitation the rights\n"
20402" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
20403" * copies of the Software, and to permit persons to whom the Software is\n"
20404" * furnished to do so, subject to the following conditions:\n"
20405" *\n"
20406" * The above copyright notice and this permission notice shall be included in\n"
20407" * all copies or substantial portions of the Software.\n"
20408" *\n"
20409" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
20410" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
20411" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
20412" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
20413" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
20414" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
20415" * THE SOFTWARE.\n"
20416" *\n"
20417"\\*===----------------------------------------------------------------------===*/\n"
20418"\n"
20419"#ifndef __HTMINTRIN_H\n"
20420"#define __HTMINTRIN_H\n"
20421"\n"
20422"#ifndef __HTM__\n"
20423"#error \"HTM instruction set not enabled\"\n"
20424"#endif\n"
20425"\n"
20426"#ifdef __powerpc__\n"
20427"\n"
20428"#include <stdint.h>\n"
20429"\n"
20430"typedef uint64_t texasr_t;\n"
20431"typedef uint32_t texasru_t;\n"
20432"typedef uint32_t texasrl_t;\n"
20433"typedef uintptr_t tfiar_t;\n"
20434"typedef uintptr_t tfhar_t;\n"
20435"\n"
20436"#define _HTM_STATE(CR0) ((CR0 >> 1) & 0x3)\n"
20437"#define _HTM_NONTRANSACTIONAL 0x0\n"
20438"#define _HTM_SUSPENDED 0x1\n"
20439"#define _HTM_TRANSACTIONAL 0x2\n"
20440"\n"
20441"#define _TEXASR_EXTRACT_BITS(TEXASR,BITNUM,SIZE) \\\n"
20442" (((TEXASR) >> (63-(BITNUM))) & ((1<<(SIZE))-1))\n"
20443"#define _TEXASRU_EXTRACT_BITS(TEXASR,BITNUM,SIZE) \\\n"
20444" (((TEXASR) >> (31-(BITNUM))) & ((1<<(SIZE))-1))\n"
20445"\n"
20446"#define _TEXASR_FAILURE_CODE(TEXASR) \\\n"
20447" _TEXASR_EXTRACT_BITS(TEXASR, 7, 8)\n"
20448"#define _TEXASRU_FAILURE_CODE(TEXASRU) \\\n"
20449" _TEXASRU_EXTRACT_BITS(TEXASRU, 7, 8)\n"
20450"\n"
20451"#define _TEXASR_FAILURE_PERSISTENT(TEXASR) \\\n"
20452" _TEXASR_EXTRACT_BITS(TEXASR, 7, 1)\n"
20453"#define _TEXASRU_FAILURE_PERSISTENT(TEXASRU) \\\n"
20454" _TEXASRU_EXTRACT_BITS(TEXASRU, 7, 1)\n"
20455"\n"
20456"#define _TEXASR_DISALLOWED(TEXASR) \\\n"
20457" _TEXASR_EXTRACT_BITS(TEXASR, 8, 1)\n"
20458"#define _TEXASRU_DISALLOWED(TEXASRU) \\\n"
20459" _TEXASRU_EXTRACT_BITS(TEXASRU, 8, 1)\n"
20460"\n"
20461"#define _TEXASR_NESTING_OVERFLOW(TEXASR) \\\n"
20462" _TEXASR_EXTRACT_BITS(TEXASR, 9, 1)\n"
20463"#define _TEXASRU_NESTING_OVERFLOW(TEXASRU) \\\n"
20464" _TEXASRU_EXTRACT_BITS(TEXASRU, 9, 1)\n"
20465"\n"
20466"#define _TEXASR_FOOTPRINT_OVERFLOW(TEXASR) \\\n"
20467" _TEXASR_EXTRACT_BITS(TEXASR, 10, 1)\n"
20468"#define _TEXASRU_FOOTPRINT_OVERFLOW(TEXASRU) \\\n"
20469" _TEXASRU_EXTRACT_BITS(TEXASRU, 10, 1)\n"
20470"\n"
20471"#define _TEXASR_SELF_INDUCED_CONFLICT(TEXASR) \\\n"
20472" _TEXASR_EXTRACT_BITS(TEXASR, 11, 1)\n"
20473"#define _TEXASRU_SELF_INDUCED_CONFLICT(TEXASRU) \\\n"
20474" _TEXASRU_EXTRACT_BITS(TEXASRU, 11, 1)\n"
20475"\n"
20476"#define _TEXASR_NON_TRANSACTIONAL_CONFLICT(TEXASR) \\\n"
20477" _TEXASR_EXTRACT_BITS(TEXASR, 12, 1)\n"
20478"#define _TEXASRU_NON_TRANSACTIONAL_CONFLICT(TEXASRU) \\\n"
20479" _TEXASRU_EXTRACT_BITS(TEXASRU, 12, 1)\n"
20480"\n"
20481"#define _TEXASR_TRANSACTION_CONFLICT(TEXASR) \\\n"
20482" _TEXASR_EXTRACT_BITS(TEXASR, 13, 1)\n"
20483"#define _TEXASRU_TRANSACTION_CONFLICT(TEXASRU) \\\n"
20484" _TEXASRU_EXTRACT_BITS(TEXASRU, 13, 1)\n"
20485"\n"
20486"#define _TEXASR_TRANSLATION_INVALIDATION_CONFLICT(TEXASR) \\\n"
20487" _TEXASR_EXTRACT_BITS(TEXASR, 14, 1)\n"
20488"#define _TEXASRU_TRANSLATION_INVALIDATION_CONFLICT(TEXASRU) \\\n"
20489" _TEXASRU_EXTRACT_BITS(TEXASRU, 14, 1)\n"
20490"\n"
20491"#define _TEXASR_IMPLEMENTAION_SPECIFIC(TEXASR) \\\n"
20492" _TEXASR_EXTRACT_BITS(TEXASR, 15, 1)\n"
20493"#define _TEXASRU_IMPLEMENTAION_SPECIFIC(TEXASRU) \\\n"
20494" _TEXASRU_EXTRACT_BITS(TEXASRU, 15, 1)\n"
20495"\n"
20496"#define _TEXASR_INSTRUCTION_FETCH_CONFLICT(TEXASR) \\\n"
20497" _TEXASR_EXTRACT_BITS(TEXASR, 16, 1)\n"
20498"#define _TEXASRU_INSTRUCTION_FETCH_CONFLICT(TEXASRU) \\\n"
20499" _TEXASRU_EXTRACT_BITS(TEXASRU, 16, 1)\n"
20500"\n"
20501"#define _TEXASR_ABORT(TEXASR) \\\n"
20502" _TEXASR_EXTRACT_BITS(TEXASR, 31, 1)\n"
20503"#define _TEXASRU_ABORT(TEXASRU) \\\n"
20504" _TEXASRU_EXTRACT_BITS(TEXASRU, 31, 1)\n"
20505"\n"
20506"\n"
20507"#define _TEXASR_SUSPENDED(TEXASR) \\\n"
20508" _TEXASR_EXTRACT_BITS(TEXASR, 32, 1)\n"
20509"\n"
20510"#define _TEXASR_PRIVILEGE(TEXASR) \\\n"
20511" _TEXASR_EXTRACT_BITS(TEXASR, 35, 2)\n"
20512"\n"
20513"#define _TEXASR_FAILURE_SUMMARY(TEXASR) \\\n"
20514" _TEXASR_EXTRACT_BITS(TEXASR, 36, 1)\n"
20515"\n"
20516"#define _TEXASR_TFIAR_EXACT(TEXASR) \\\n"
20517" _TEXASR_EXTRACT_BITS(TEXASR, 37, 1)\n"
20518"\n"
20519"#define _TEXASR_ROT(TEXASR) \\\n"
20520" _TEXASR_EXTRACT_BITS(TEXASR, 38, 1)\n"
20521"\n"
20522"#define _TEXASR_TRANSACTION_LEVEL(TEXASR) \\\n"
20523" _TEXASR_EXTRACT_BITS(TEXASR, 63, 12)\n"
20524"\n"
20525"#endif /* __powerpc */\n"
20526"\n"
20527"#ifdef __s390__\n"
20528"\n"
20529"/* Condition codes generated by tbegin */\n"
20530"#define _HTM_TBEGIN_STARTED 0\n"
20531"#define _HTM_TBEGIN_INDETERMINATE 1\n"
20532"#define _HTM_TBEGIN_TRANSIENT 2\n"
20533"#define _HTM_TBEGIN_PERSISTENT 3\n"
20534"\n"
20535"/* The abort codes below this threshold are reserved for machine use. */\n"
20536"#define _HTM_FIRST_USER_ABORT_CODE 256\n"
20537"\n"
20538"/* The transaction diagnostic block is it is defined in the Principles\n"
20539" of Operation chapter 5-91. */\n"
20540"\n"
20541"struct __htm_tdb {\n"
20542" unsigned char format; /* 0 */\n"
20543" unsigned char flags;\n"
20544" unsigned char reserved1[4];\n"
20545" unsigned short nesting_depth;\n"
20546" unsigned long long abort_code; /* 8 */\n"
20547" unsigned long long conflict_token; /* 16 */\n"
20548" unsigned long long atia; /* 24 */\n"
20549" unsigned char eaid; /* 32 */\n"
20550" unsigned char dxc;\n"
20551" unsigned char reserved2[2];\n"
20552" unsigned int program_int_id;\n"
20553" unsigned long long exception_id; /* 40 */\n"
20554" unsigned long long bea; /* 48 */\n"
20555" unsigned char reserved3[72]; /* 56 */\n"
20556" unsigned long long gprs[16]; /* 128 */\n"
20557"} __attribute__((__packed__, __aligned__ (8)));\n"
20558"\n"
20559"\n"
20560"/* Helper intrinsics to retry tbegin in case of transient failure. */\n"
20561"\n"
20562"static __inline int __attribute__((__always_inline__, __nodebug__))\n"
20563"__builtin_tbegin_retry_null (int __retry)\n"
20564"{\n"
20565" int cc, i = 0;\n"
20566"\n"
20567" while ((cc = __builtin_tbegin(0)) == _HTM_TBEGIN_TRANSIENT\n"
20568" && i++ < __retry)\n"
20569" __builtin_tx_assist(i);\n"
20570"\n"
20571" return cc;\n"
20572"}\n"
20573"\n"
20574"static __inline int __attribute__((__always_inline__, __nodebug__))\n"
20575"__builtin_tbegin_retry_tdb (void *__tdb, int __retry)\n"
20576"{\n"
20577" int cc, i = 0;\n"
20578"\n"
20579" while ((cc = __builtin_tbegin(__tdb)) == _HTM_TBEGIN_TRANSIENT\n"
20580" && i++ < __retry)\n"
20581" __builtin_tx_assist(i);\n"
20582"\n"
20583" return cc;\n"
20584"}\n"
20585"\n"
20586"#define __builtin_tbegin_retry(tdb, retry) \\\n"
20587" (__builtin_constant_p(tdb == 0) && tdb == 0 ? \\\n"
20588" __builtin_tbegin_retry_null(retry) : \\\n"
20589" __builtin_tbegin_retry_tdb(tdb, retry))\n"
20590"\n"
20591"static __inline int __attribute__((__always_inline__, __nodebug__))\n"
20592"__builtin_tbegin_retry_nofloat_null (int __retry)\n"
20593"{\n"
20594" int cc, i = 0;\n"
20595"\n"
20596" while ((cc = __builtin_tbegin_nofloat(0)) == _HTM_TBEGIN_TRANSIENT\n"
20597" && i++ < __retry)\n"
20598" __builtin_tx_assist(i);\n"
20599"\n"
20600" return cc;\n"
20601"}\n"
20602"\n"
20603"static __inline int __attribute__((__always_inline__, __nodebug__))\n"
20604"__builtin_tbegin_retry_nofloat_tdb (void *__tdb, int __retry)\n"
20605"{\n"
20606" int cc, i = 0;\n"
20607"\n"
20608" while ((cc = __builtin_tbegin_nofloat(__tdb)) == _HTM_TBEGIN_TRANSIENT\n"
20609" && i++ < __retry)\n"
20610" __builtin_tx_assist(i);\n"
20611"\n"
20612" return cc;\n"
20613"}\n"
20614"\n"
20615"#define __builtin_tbegin_retry_nofloat(tdb, retry) \\\n"
20616" (__builtin_constant_p(tdb == 0) && tdb == 0 ? \\\n"
20617" __builtin_tbegin_retry_nofloat_null(retry) : \\\n"
20618" __builtin_tbegin_retry_nofloat_tdb(tdb, retry))\n"
20619"\n"
20620"#endif /* __s390__ */\n"
20621"\n"
20622"#endif /* __HTMINTRIN_H */\n"
20623"" } ,
20624 { "/builtins/htmxlintrin.h" , "/*===---- htmxlintrin.h - XL compiler HTM execution intrinsics-------------===*\\\n"
20625" *\n"
20626" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
20627" * of this software and associated documentation files (the \"Software\"), to deal\n"
20628" * in the Software without restriction, including without limitation the rights\n"
20629" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
20630" * copies of the Software, and to permit persons to whom the Software is\n"
20631" * furnished to do so, subject to the following conditions:\n"
20632" *\n"
20633" * The above copyright notice and this permission notice shall be included in\n"
20634" * all copies or substantial portions of the Software.\n"
20635" *\n"
20636" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
20637" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
20638" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
20639" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
20640" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
20641" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
20642" * THE SOFTWARE.\n"
20643" *\n"
20644"\\*===----------------------------------------------------------------------===*/\n"
20645"\n"
20646"#ifndef __HTMXLINTRIN_H\n"
20647"#define __HTMXLINTRIN_H\n"
20648"\n"
20649"#ifndef __HTM__\n"
20650"#error \"HTM instruction set not enabled\"\n"
20651"#endif\n"
20652"\n"
20653"#include <htmintrin.h>\n"
20654"\n"
20655"#ifdef __powerpc__\n"
20656"\n"
20657"#ifdef __cplusplus\n"
20658"extern \"C\" {\n"
20659"#endif\n"
20660"\n"
20661"#define _TEXASR_PTR(TM_BUF) ((texasr_t *)((char *)(TM_BUF) + 0))\n"
20662"#define _TEXASRU_PTR(TM_BUF) ((texasru_t *)((char *)(TM_BUF) + 0))\n"
20663"#define _TEXASRL_PTR(TM_BUF) ((texasrl_t *)((char *)(TM_BUF) + 4))\n"
20664"#define _TFIAR_PTR(TM_BUF) ((tfiar_t *)((char *)(TM_BUF) + 8))\n"
20665"\n"
20666"typedef char TM_buff_type[16];\n"
20667"\n"
20668"/* This macro can be used to determine whether a transaction was successfully\n"
20669" started from the __TM_begin() and __TM_simple_begin() intrinsic functions\n"
20670" below. */\n"
20671"#define _HTM_TBEGIN_STARTED 1\n"
20672"\n"
20673"extern __inline long\n"
20674"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20675"__TM_simple_begin (void)\n"
20676"{\n"
20677" if (__builtin_expect (__builtin_tbegin (0), 1))\n"
20678" return _HTM_TBEGIN_STARTED;\n"
20679" return 0;\n"
20680"}\n"
20681"\n"
20682"extern __inline long\n"
20683"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20684"__TM_begin (void* const __TM_buff)\n"
20685"{\n"
20686" *_TEXASRL_PTR (__TM_buff) = 0;\n"
20687" if (__builtin_expect (__builtin_tbegin (0), 1))\n"
20688" return _HTM_TBEGIN_STARTED;\n"
20689"#ifdef __powerpc64__\n"
20690" *_TEXASR_PTR (__TM_buff) = __builtin_get_texasr ();\n"
20691"#else\n"
20692" *_TEXASRU_PTR (__TM_buff) = __builtin_get_texasru ();\n"
20693" *_TEXASRL_PTR (__TM_buff) = __builtin_get_texasr ();\n"
20694"#endif\n"
20695" *_TFIAR_PTR (__TM_buff) = __builtin_get_tfiar ();\n"
20696" return 0;\n"
20697"}\n"
20698"\n"
20699"extern __inline long\n"
20700"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20701"__TM_end (void)\n"
20702"{\n"
20703" if (__builtin_expect (__builtin_tend (0), 1))\n"
20704" return 1;\n"
20705" return 0;\n"
20706"}\n"
20707"\n"
20708"extern __inline void\n"
20709"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20710"__TM_abort (void)\n"
20711"{\n"
20712" __builtin_tabort (0);\n"
20713"}\n"
20714"\n"
20715"extern __inline void\n"
20716"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20717"__TM_named_abort (unsigned char const __code)\n"
20718"{\n"
20719" __builtin_tabort (__code);\n"
20720"}\n"
20721"\n"
20722"extern __inline void\n"
20723"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20724"__TM_resume (void)\n"
20725"{\n"
20726" __builtin_tresume ();\n"
20727"}\n"
20728"\n"
20729"extern __inline void\n"
20730"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20731"__TM_suspend (void)\n"
20732"{\n"
20733" __builtin_tsuspend ();\n"
20734"}\n"
20735"\n"
20736"extern __inline long\n"
20737"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20738"__TM_is_user_abort (void* const __TM_buff)\n"
20739"{\n"
20740" texasru_t texasru = *_TEXASRU_PTR (__TM_buff);\n"
20741" return _TEXASRU_ABORT (texasru);\n"
20742"}\n"
20743"\n"
20744"extern __inline long\n"
20745"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20746"__TM_is_named_user_abort (void* const __TM_buff, unsigned char *__code)\n"
20747"{\n"
20748" texasru_t texasru = *_TEXASRU_PTR (__TM_buff);\n"
20749"\n"
20750" *__code = _TEXASRU_FAILURE_CODE (texasru);\n"
20751" return _TEXASRU_ABORT (texasru);\n"
20752"}\n"
20753"\n"
20754"extern __inline long\n"
20755"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20756"__TM_is_illegal (void* const __TM_buff)\n"
20757"{\n"
20758" texasru_t texasru = *_TEXASRU_PTR (__TM_buff);\n"
20759" return _TEXASRU_DISALLOWED (texasru);\n"
20760"}\n"
20761"\n"
20762"extern __inline long\n"
20763"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20764"__TM_is_footprint_exceeded (void* const __TM_buff)\n"
20765"{\n"
20766" texasru_t texasru = *_TEXASRU_PTR (__TM_buff);\n"
20767" return _TEXASRU_FOOTPRINT_OVERFLOW (texasru);\n"
20768"}\n"
20769"\n"
20770"extern __inline long\n"
20771"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20772"__TM_nesting_depth (void* const __TM_buff)\n"
20773"{\n"
20774" texasrl_t texasrl;\n"
20775"\n"
20776" if (_HTM_STATE (__builtin_ttest ()) == _HTM_NONTRANSACTIONAL)\n"
20777" {\n"
20778" texasrl = *_TEXASRL_PTR (__TM_buff);\n"
20779" if (!_TEXASR_FAILURE_SUMMARY (texasrl))\n"
20780" texasrl = 0;\n"
20781" }\n"
20782" else\n"
20783" texasrl = (texasrl_t) __builtin_get_texasr ();\n"
20784"\n"
20785" return _TEXASR_TRANSACTION_LEVEL (texasrl);\n"
20786"}\n"
20787"\n"
20788"extern __inline long\n"
20789"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20790"__TM_is_nested_too_deep(void* const __TM_buff)\n"
20791"{\n"
20792" texasru_t texasru = *_TEXASRU_PTR (__TM_buff);\n"
20793" return _TEXASRU_NESTING_OVERFLOW (texasru);\n"
20794"}\n"
20795"\n"
20796"extern __inline long\n"
20797"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20798"__TM_is_conflict(void* const __TM_buff)\n"
20799"{\n"
20800" texasru_t texasru = *_TEXASRU_PTR (__TM_buff);\n"
20801" /* Return TEXASR bits 11 (Self-Induced Conflict) through\n"
20802" 14 (Translation Invalidation Conflict). */\n"
20803" return (_TEXASRU_EXTRACT_BITS (texasru, 14, 4)) ? 1 : 0;\n"
20804"}\n"
20805"\n"
20806"extern __inline long\n"
20807"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20808"__TM_is_failure_persistent(void* const __TM_buff)\n"
20809"{\n"
20810" texasru_t texasru = *_TEXASRU_PTR (__TM_buff);\n"
20811" return _TEXASRU_FAILURE_PERSISTENT (texasru);\n"
20812"}\n"
20813"\n"
20814"extern __inline long\n"
20815"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20816"__TM_failure_address(void* const __TM_buff)\n"
20817"{\n"
20818" return *_TFIAR_PTR (__TM_buff);\n"
20819"}\n"
20820"\n"
20821"extern __inline long long\n"
20822"__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n"
20823"__TM_failure_code(void* const __TM_buff)\n"
20824"{\n"
20825" return *_TEXASR_PTR (__TM_buff);\n"
20826"}\n"
20827"\n"
20828"#ifdef __cplusplus\n"
20829"}\n"
20830"#endif\n"
20831"\n"
20832"#endif /* __powerpc__ */\n"
20833"\n"
20834"#ifdef __s390__\n"
20835"\n"
20836"#include <stdint.h>\n"
20837"\n"
20838"/* These intrinsics are being made available for compatibility with\n"
20839" the IBM XL compiler. For documentation please see the \"z/OS XL\n"
20840" C/C++ Programming Guide\" publicly available on the web. */\n"
20841"\n"
20842"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
20843"__TM_simple_begin ()\n"
20844"{\n"
20845" return __builtin_tbegin_nofloat (0);\n"
20846"}\n"
20847"\n"
20848"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
20849"__TM_begin (void* const __tdb)\n"
20850"{\n"
20851" return __builtin_tbegin_nofloat (__tdb);\n"
20852"}\n"
20853"\n"
20854"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
20855"__TM_end ()\n"
20856"{\n"
20857" return __builtin_tend ();\n"
20858"}\n"
20859"\n"
20860"static __inline void __attribute__((__always_inline__))\n"
20861"__TM_abort ()\n"
20862"{\n"
20863" return __builtin_tabort (_HTM_FIRST_USER_ABORT_CODE);\n"
20864"}\n"
20865"\n"
20866"static __inline void __attribute__((__always_inline__, __nodebug__))\n"
20867"__TM_named_abort (unsigned char const __code)\n"
20868"{\n"
20869" return __builtin_tabort ((int)_HTM_FIRST_USER_ABORT_CODE + __code);\n"
20870"}\n"
20871"\n"
20872"static __inline void __attribute__((__always_inline__, __nodebug__))\n"
20873"__TM_non_transactional_store (void* const __addr, long long const __value)\n"
20874"{\n"
20875" __builtin_non_tx_store ((uint64_t*)__addr, (uint64_t)__value);\n"
20876"}\n"
20877"\n"
20878"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
20879"__TM_nesting_depth (void* const __tdb_ptr)\n"
20880"{\n"
20881" int depth = __builtin_tx_nesting_depth ();\n"
20882" struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n"
20883"\n"
20884" if (depth != 0)\n"
20885" return depth;\n"
20886"\n"
20887" if (tdb->format != 1)\n"
20888" return 0;\n"
20889" return tdb->nesting_depth;\n"
20890"}\n"
20891"\n"
20892"/* Transaction failure diagnostics */\n"
20893"\n"
20894"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
20895"__TM_is_user_abort (void* const __tdb_ptr)\n"
20896"{\n"
20897" struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n"
20898"\n"
20899" if (tdb->format != 1)\n"
20900" return 0;\n"
20901"\n"
20902" return !!(tdb->abort_code >= _HTM_FIRST_USER_ABORT_CODE);\n"
20903"}\n"
20904"\n"
20905"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
20906"__TM_is_named_user_abort (void* const __tdb_ptr, unsigned char* __code)\n"
20907"{\n"
20908" struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n"
20909"\n"
20910" if (tdb->format != 1)\n"
20911" return 0;\n"
20912"\n"
20913" if (tdb->abort_code >= _HTM_FIRST_USER_ABORT_CODE)\n"
20914" {\n"
20915" *__code = tdb->abort_code - _HTM_FIRST_USER_ABORT_CODE;\n"
20916" return 1;\n"
20917" }\n"
20918" return 0;\n"
20919"}\n"
20920"\n"
20921"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
20922"__TM_is_illegal (void* const __tdb_ptr)\n"
20923"{\n"
20924" struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n"
20925"\n"
20926" return (tdb->format == 1\n"
20927" && (tdb->abort_code == 4 /* unfiltered program interruption */\n"
20928" || tdb->abort_code == 11 /* restricted instruction */));\n"
20929"}\n"
20930"\n"
20931"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
20932"__TM_is_footprint_exceeded (void* const __tdb_ptr)\n"
20933"{\n"
20934" struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n"
20935"\n"
20936" return (tdb->format == 1\n"
20937" && (tdb->abort_code == 7 /* fetch overflow */\n"
20938" || tdb->abort_code == 8 /* store overflow */));\n"
20939"}\n"
20940"\n"
20941"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
20942"__TM_is_nested_too_deep (void* const __tdb_ptr)\n"
20943"{\n"
20944" struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n"
20945"\n"
20946" return tdb->format == 1 && tdb->abort_code == 13; /* depth exceeded */\n"
20947"}\n"
20948"\n"
20949"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
20950"__TM_is_conflict (void* const __tdb_ptr)\n"
20951"{\n"
20952" struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n"
20953"\n"
20954" return (tdb->format == 1\n"
20955" && (tdb->abort_code == 9 /* fetch conflict */\n"
20956" || tdb->abort_code == 10 /* store conflict */));\n"
20957"}\n"
20958"\n"
20959"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
20960"__TM_is_failure_persistent (long const __result)\n"
20961"{\n"
20962" return __result == _HTM_TBEGIN_PERSISTENT;\n"
20963"}\n"
20964"\n"
20965"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
20966"__TM_failure_address (void* const __tdb_ptr)\n"
20967"{\n"
20968" struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n"
20969" return tdb->atia;\n"
20970"}\n"
20971"\n"
20972"static __inline long __attribute__((__always_inline__, __nodebug__))\n"
20973"__TM_failure_code (void* const __tdb_ptr)\n"
20974"{\n"
20975" struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n"
20976"\n"
20977" return tdb->abort_code;\n"
20978"}\n"
20979"\n"
20980"#endif /* __s390__ */\n"
20981"\n"
20982"#endif /* __HTMXLINTRIN_H */\n"
20983"" } ,
20984 { "/builtins/ia32intrin.h" , "/* ===-------- ia32intrin.h ---------------------------------------------------===\n"
20985" *\n"
20986" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
20987" * of this software and associated documentation files (the \"Software\"), to deal\n"
20988" * in the Software without restriction, including without limitation the rights\n"
20989" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
20990" * copies of the Software, and to permit persons to whom the Software is\n"
20991" * furnished to do so, subject to the following conditions:\n"
20992" *\n"
20993" * The above copyright notice and this permission notice shall be included in\n"
20994" * all copies or substantial portions of the Software.\n"
20995" *\n"
20996" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
20997" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
20998" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
20999" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
21000" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
21001" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
21002" * THE SOFTWARE.\n"
21003" *\n"
21004" *===-----------------------------------------------------------------------===\n"
21005" */\n"
21006"\n"
21007"#ifndef __X86INTRIN_H\n"
21008"#error \"Never use <ia32intrin.h> directly; include <x86intrin.h> instead.\"\n"
21009"#endif\n"
21010"\n"
21011"#ifndef __IA32INTRIN_H\n"
21012"#define __IA32INTRIN_H\n"
21013"\n"
21014"#ifdef __x86_64__\n"
21015"static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))\n"
21016"__readeflags(void)\n"
21017"{\n"
21018" return __builtin_ia32_readeflags_u64();\n"
21019"}\n"
21020"\n"
21021"static __inline__ void __attribute__((__always_inline__, __nodebug__))\n"
21022"__writeeflags(unsigned long long __f)\n"
21023"{\n"
21024" __builtin_ia32_writeeflags_u64(__f);\n"
21025"}\n"
21026"\n"
21027"#else /* !__x86_64__ */\n"
21028"static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))\n"
21029"__readeflags(void)\n"
21030"{\n"
21031" return __builtin_ia32_readeflags_u32();\n"
21032"}\n"
21033"\n"
21034"static __inline__ void __attribute__((__always_inline__, __nodebug__))\n"
21035"__writeeflags(unsigned int __f)\n"
21036"{\n"
21037" __builtin_ia32_writeeflags_u32(__f);\n"
21038"}\n"
21039"#endif /* !__x86_64__ */\n"
21040"\n"
21041"static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))\n"
21042"__rdpmc(int __A) {\n"
21043" return __builtin_ia32_rdpmc(__A);\n"
21044"}\n"
21045"\n"
21046"/* __rdtscp */\n"
21047"static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))\n"
21048"__rdtscp(unsigned int *__A) {\n"
21049" return __builtin_ia32_rdtscp(__A);\n"
21050"}\n"
21051"\n"
21052"#define _rdtsc() __rdtsc()\n"
21053"\n"
21054"#define _rdpmc(A) __rdpmc(A)\n"
21055"\n"
21056"static __inline__ void __attribute__((__always_inline__, __nodebug__))\n"
21057"_wbinvd(void) {\n"
21058" __builtin_ia32_wbinvd();\n"
21059"}\n"
21060"\n"
21061"#endif /* __IA32INTRIN_H */\n"
21062"" } ,
21063 { "/builtins/immintrin.h" , "/*===---- immintrin.h - Intel intrinsics -----------------------------------===\n"
21064" *\n"
21065" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
21066" * of this software and associated documentation files (the \"Software\"), to deal\n"
21067" * in the Software without restriction, including without limitation the rights\n"
21068" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
21069" * copies of the Software, and to permit persons to whom the Software is\n"
21070" * furnished to do so, subject to the following conditions:\n"
21071" *\n"
21072" * The above copyright notice and this permission notice shall be included in\n"
21073" * all copies or substantial portions of the Software.\n"
21074" *\n"
21075" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
21076" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
21077" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
21078" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
21079" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
21080" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
21081" * THE SOFTWARE.\n"
21082" *\n"
21083" *===-----------------------------------------------------------------------===\n"
21084" */\n"
21085"\n"
21086"#ifndef __IMMINTRIN_H\n"
21087"#define __IMMINTRIN_H\n"
21088"\n"
21089"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MMX__)\n"
21090"#include <mmintrin.h>\n"
21091"#endif\n"
21092"\n"
21093"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE__)\n"
21094"#include <xmmintrin.h>\n"
21095"#endif\n"
21096"\n"
21097"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE2__)\n"
21098"#include <emmintrin.h>\n"
21099"#endif\n"
21100"\n"
21101"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE3__)\n"
21102"#include <pmmintrin.h>\n"
21103"#endif\n"
21104"\n"
21105"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSSE3__)\n"
21106"#include <tmmintrin.h>\n"
21107"#endif\n"
21108"\n"
21109"#if !defined(_MSC_VER) || __has_feature(modules) || \\\n"
21110" (defined(__SSE4_2__) || defined(__SSE4_1__))\n"
21111"#include <smmintrin.h>\n"
21112"#endif\n"
21113"\n"
21114"#if !defined(_MSC_VER) || __has_feature(modules) || \\\n"
21115" (defined(__AES__) || defined(__PCLMUL__))\n"
21116"#include <wmmintrin.h>\n"
21117"#endif\n"
21118"\n"
21119"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLFLUSHOPT__)\n"
21120"#include <clflushoptintrin.h>\n"
21121"#endif\n"
21122"\n"
21123"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLWB__)\n"
21124"#include <clwbintrin.h>\n"
21125"#endif\n"
21126"\n"
21127"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__)\n"
21128"#include <avxintrin.h>\n"
21129"#endif\n"
21130"\n"
21131"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX2__)\n"
21132"#include <avx2intrin.h>\n"
21133"#endif\n"
21134"\n"
21135"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__F16C__)\n"
21136"#include <f16cintrin.h>\n"
21137"#endif\n"
21138"\n"
21139"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VPCLMULQDQ__)\n"
21140"#include <vpclmulqdqintrin.h>\n"
21141"#endif\n"
21142"\n"
21143"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__)\n"
21144"#include <bmiintrin.h>\n"
21145"#endif\n"
21146"\n"
21147"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__)\n"
21148"#include <bmi2intrin.h>\n"
21149"#endif\n"
21150"\n"
21151"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__LZCNT__)\n"
21152"#include <lzcntintrin.h>\n"
21153"#endif\n"
21154"\n"
21155"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__POPCNT__)\n"
21156"#include <popcntintrin.h>\n"
21157"#endif\n"
21158"\n"
21159"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA__)\n"
21160"#include <fmaintrin.h>\n"
21161"#endif\n"
21162"\n"
21163"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512F__)\n"
21164"#include <avx512fintrin.h>\n"
21165"#endif\n"
21166"\n"
21167"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VL__)\n"
21168"#include <avx512vlintrin.h>\n"
21169"#endif\n"
21170"\n"
21171"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BW__)\n"
21172"#include <avx512bwintrin.h>\n"
21173"#endif\n"
21174"\n"
21175"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BITALG__)\n"
21176"#include <avx512bitalgintrin.h>\n"
21177"#endif\n"
21178"\n"
21179"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__)\n"
21180"#include <avx512cdintrin.h>\n"
21181"#endif\n"
21182"\n"
21183"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__)\n"
21184"#include <avx512vpopcntdqintrin.h>\n"
21185"#endif\n"
21186"\n"
21187"#if !defined(_MSC_VER) || __has_feature(modules) || \\\n"
21188" (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))\n"
21189"#include <avx512vpopcntdqvlintrin.h>\n"
21190"#endif\n"
21191"\n"
21192"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VNNI__)\n"
21193"#include <avx512vnniintrin.h>\n"
21194"#endif\n"
21195"\n"
21196"#if !defined(_MSC_VER) || __has_feature(modules) || \\\n"
21197" (defined(__AVX512VL__) && defined(__AVX512VNNI__))\n"
21198"#include <avx512vlvnniintrin.h>\n"
21199"#endif\n"
21200"\n"
21201"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__)\n"
21202"#include <avx512dqintrin.h>\n"
21203"#endif\n"
21204"\n"
21205"#if !defined(_MSC_VER) || __has_feature(modules) || \\\n"
21206" (defined(__AVX512VL__) && defined(__AVX512BITALG__))\n"
21207"#include <avx512vlbitalgintrin.h>\n"
21208"#endif\n"
21209"\n"
21210"#if !defined(_MSC_VER) || __has_feature(modules) || \\\n"
21211" (defined(__AVX512VL__) && defined(__AVX512BW__))\n"
21212"#include <avx512vlbwintrin.h>\n"
21213"#endif\n"
21214"\n"
21215"#if !defined(_MSC_VER) || __has_feature(modules) || \\\n"
21216" (defined(__AVX512VL__) && defined(__AVX512CD__))\n"
21217"#include <avx512vlcdintrin.h>\n"
21218"#endif\n"
21219"\n"
21220"#if !defined(_MSC_VER) || __has_feature(modules) || \\\n"
21221" (defined(__AVX512VL__) && defined(__AVX512DQ__))\n"
21222"#include <avx512vldqintrin.h>\n"
21223"#endif\n"
21224"\n"
21225"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512ER__)\n"
21226"#include <avx512erintrin.h>\n"
21227"#endif\n"
21228"\n"
21229"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512IFMA__)\n"
21230"#include <avx512ifmaintrin.h>\n"
21231"#endif\n"
21232"\n"
21233"#if !defined(_MSC_VER) || __has_feature(modules) || \\\n"
21234" (defined(__AVX512IFMA__) && defined(__AVX512VL__))\n"
21235"#include <avx512ifmavlintrin.h>\n"
21236"#endif\n"
21237"\n"
21238"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI__)\n"
21239"#include <avx512vbmiintrin.h>\n"
21240"#endif\n"
21241"\n"
21242"#if !defined(_MSC_VER) || __has_feature(modules) || \\\n"
21243" (defined(__AVX512VBMI__) && defined(__AVX512VL__))\n"
21244"#include <avx512vbmivlintrin.h>\n"
21245"#endif\n"
21246"\n"
21247"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI2__)\n"
21248"#include <avx512vbmi2intrin.h>\n"
21249"#endif\n"
21250"\n"
21251"#if !defined(_MSC_VER) || __has_feature(modules) || \\\n"
21252" (defined(__AVX512VBMI2__) && defined(__AVX512VL__))\n"
21253"#include <avx512vlvbmi2intrin.h>\n"
21254"#endif\n"
21255"\n"
21256"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__)\n"
21257"#include <avx512pfintrin.h>\n"
21258"#endif\n"
21259"\n"
21260"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PKU__)\n"
21261"#include <pkuintrin.h>\n"
21262"#endif\n"
21263"\n"
21264"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VAES__)\n"
21265"#include <vaesintrin.h>\n"
21266"#endif\n"
21267"\n"
21268"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__GFNI__)\n"
21269"#include <gfniintrin.h>\n"
21270"#endif\n"
21271"\n"
21272"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDPID__)\n"
21273"/// Returns the value of the IA32_TSC_AUX MSR (0xc0000103).\n"
21274"///\n"
21275"/// \\headerfile <immintrin.h>\n"
21276"///\n"
21277"/// This intrinsic corresponds to the <c> RDPID </c> instruction.\n"
21278"static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__(\"rdpid\")))\n"
21279"_rdpid_u32(void) {\n"
21280" return __builtin_ia32_rdpid();\n"
21281"}\n"
21282"#endif // __RDPID__\n"
21283"\n"
21284"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__)\n"
21285"static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__(\"rdrnd\")))\n"
21286"_rdrand16_step(unsigned short *__p)\n"
21287"{\n"
21288" return __builtin_ia32_rdrand16_step(__p);\n"
21289"}\n"
21290"\n"
21291"static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__(\"rdrnd\")))\n"
21292"_rdrand32_step(unsigned int *__p)\n"
21293"{\n"
21294" return __builtin_ia32_rdrand32_step(__p);\n"
21295"}\n"
21296"\n"
21297"#ifdef __x86_64__\n"
21298"static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__(\"rdrnd\")))\n"
21299"_rdrand64_step(unsigned long long *__p)\n"
21300"{\n"
21301" return __builtin_ia32_rdrand64_step(__p);\n"
21302"}\n"
21303"#endif\n"
21304"#endif /* __RDRND__ */\n"
21305"\n"
21306"/* __bit_scan_forward */\n"
21307"static __inline__ int __attribute__((__always_inline__, __nodebug__))\n"
21308"_bit_scan_forward(int __A) {\n"
21309" return __builtin_ctz(__A);\n"
21310"}\n"
21311"\n"
21312"/* __bit_scan_reverse */\n"
21313"static __inline__ int __attribute__((__always_inline__, __nodebug__))\n"
21314"_bit_scan_reverse(int __A) {\n"
21315" return 31 - __builtin_clz(__A);\n"
21316"}\n"
21317"\n"
21318"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FSGSBASE__)\n"
21319"#ifdef __x86_64__\n"
21320"static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n"
21321"_readfsbase_u32(void)\n"
21322"{\n"
21323" return __builtin_ia32_rdfsbase32();\n"
21324"}\n"
21325"\n"
21326"static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n"
21327"_readfsbase_u64(void)\n"
21328"{\n"
21329" return __builtin_ia32_rdfsbase64();\n"
21330"}\n"
21331"\n"
21332"static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n"
21333"_readgsbase_u32(void)\n"
21334"{\n"
21335" return __builtin_ia32_rdgsbase32();\n"
21336"}\n"
21337"\n"
21338"static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n"
21339"_readgsbase_u64(void)\n"
21340"{\n"
21341" return __builtin_ia32_rdgsbase64();\n"
21342"}\n"
21343"\n"
21344"static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n"
21345"_writefsbase_u32(unsigned int __V)\n"
21346"{\n"
21347" __builtin_ia32_wrfsbase32(__V);\n"
21348"}\n"
21349"\n"
21350"static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n"
21351"_writefsbase_u64(unsigned long long __V)\n"
21352"{\n"
21353" __builtin_ia32_wrfsbase64(__V);\n"
21354"}\n"
21355"\n"
21356"static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n"
21357"_writegsbase_u32(unsigned int __V)\n"
21358"{\n"
21359" __builtin_ia32_wrgsbase32(__V);\n"
21360"}\n"
21361"\n"
21362"static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n"
21363"_writegsbase_u64(unsigned long long __V)\n"
21364"{\n"
21365" __builtin_ia32_wrgsbase64(__V);\n"
21366"}\n"
21367"\n"
21368"#endif\n"
21369"#endif /* __FSGSBASE__ */\n"
21370"\n"
21371"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__)\n"
21372"#include <rtmintrin.h>\n"
21373"#include <xtestintrin.h>\n"
21374"#endif\n"
21375"\n"
21376"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHA__)\n"
21377"#include <shaintrin.h>\n"
21378"#endif\n"
21379"\n"
21380"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FXSR__)\n"
21381"#include <fxsrintrin.h>\n"
21382"#endif\n"
21383"\n"
21384"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVE__)\n"
21385"#include <xsaveintrin.h>\n"
21386"#endif\n"
21387"\n"
21388"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEOPT__)\n"
21389"#include <xsaveoptintrin.h>\n"
21390"#endif\n"
21391"\n"
21392"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEC__)\n"
21393"#include <xsavecintrin.h>\n"
21394"#endif\n"
21395"\n"
21396"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVES__)\n"
21397"#include <xsavesintrin.h>\n"
21398"#endif\n"
21399"\n"
21400"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHSTK__)\n"
21401"#include <cetintrin.h>\n"
21402"#endif\n"
21403"\n"
21404"/* Some intrinsics inside adxintrin.h are available only on processors with ADX,\n"
21405" * whereas others are also available at all times. */\n"
21406"#include <adxintrin.h>\n"
21407"\n"
21408"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDSEED__)\n"
21409"#include <rdseedintrin.h>\n"
21410"#endif\n"
21411"\n"
21412"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__WBNOINVD__)\n"
21413"#include <wbnoinvdintrin.h>\n"
21414"#endif\n"
21415"\n"
21416"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLDEMOTE__)\n"
21417"#include <cldemoteintrin.h>\n"
21418"#endif\n"
21419"\n"
21420"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__WAITPKG__)\n"
21421"#include <waitpkgintrin.h>\n"
21422"#endif\n"
21423"\n"
21424"#if !defined(_MSC_VER) || __has_feature(modules) || \\\n"
21425" defined(__MOVDIRI__) || defined(__MOVDIR64B__)\n"
21426"#include <movdirintrin.h>\n"
21427"#endif\n"
21428"\n"
21429"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PCONFIG__)\n"
21430"#include <pconfigintrin.h>\n"
21431"#endif\n"
21432"\n"
21433"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SGX__)\n"
21434"#include <sgxintrin.h>\n"
21435"#endif\n"
21436"\n"
21437"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PTWRITE__)\n"
21438"#include <ptwriteintrin.h>\n"
21439"#endif\n"
21440"\n"
21441"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__INVPCID__)\n"
21442"#include <invpcidintrin.h>\n"
21443"#endif\n"
21444"\n"
21445"#ifdef _MSC_VER\n"
21446"/* Define the default attributes for these intrinsics */\n"
21447"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))\n"
21448"#ifdef __cplusplus\n"
21449"extern \"C\" {\n"
21450"#endif\n"
21451"/*----------------------------------------------------------------------------*\\\n"
21452"|* Interlocked Exchange HLE\n"
21453"\\*----------------------------------------------------------------------------*/\n"
21454"#if defined(__i386__) || defined(__x86_64__)\n"
21455"static __inline__ long __DEFAULT_FN_ATTRS\n"
21456"_InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) {\n"
21457" __asm__ __volatile__(\".byte 0xf2 ; lock ; xchg %0, %1\"\n"
21458" : \"+r\" (_Value), \"+m\" (*_Target) :: \"memory\");\n"
21459" return _Value;\n"
21460"}\n"
21461"static __inline__ long __DEFAULT_FN_ATTRS\n"
21462"_InterlockedExchange_HLERelease(long volatile *_Target, long _Value) {\n"
21463" __asm__ __volatile__(\".byte 0xf3 ; lock ; xchg %0, %1\"\n"
21464" : \"+r\" (_Value), \"+m\" (*_Target) :: \"memory\");\n"
21465" return _Value;\n"
21466"}\n"
21467"#endif\n"
21468"#if defined(__x86_64__)\n"
21469"static __inline__ __int64 __DEFAULT_FN_ATTRS\n"
21470"_InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) {\n"
21471" __asm__ __volatile__(\".byte 0xf2 ; lock ; xchg %0, %1\"\n"
21472" : \"+r\" (_Value), \"+m\" (*_Target) :: \"memory\");\n"
21473" return _Value;\n"
21474"}\n"
21475"static __inline__ __int64 __DEFAULT_FN_ATTRS\n"
21476"_InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) {\n"
21477" __asm__ __volatile__(\".byte 0xf3 ; lock ; xchg %0, %1\"\n"
21478" : \"+r\" (_Value), \"+m\" (*_Target) :: \"memory\");\n"
21479" return _Value;\n"
21480"}\n"
21481"#endif\n"
21482"/*----------------------------------------------------------------------------*\\\n"
21483"|* Interlocked Compare Exchange HLE\n"
21484"\\*----------------------------------------------------------------------------*/\n"
21485"#if defined(__i386__) || defined(__x86_64__)\n"
21486"static __inline__ long __DEFAULT_FN_ATTRS\n"
21487"_InterlockedCompareExchange_HLEAcquire(long volatile *_Destination,\n"
21488" long _Exchange, long _Comparand) {\n"
21489" __asm__ __volatile__(\".byte 0xf2 ; lock ; cmpxchg %2, %1\"\n"
21490" : \"+a\" (_Comparand), \"+m\" (*_Destination)\n"
21491" : \"r\" (_Exchange) : \"memory\");\n"
21492" return _Comparand;\n"
21493"}\n"
21494"static __inline__ long __DEFAULT_FN_ATTRS\n"
21495"_InterlockedCompareExchange_HLERelease(long volatile *_Destination,\n"
21496" long _Exchange, long _Comparand) {\n"
21497" __asm__ __volatile__(\".byte 0xf3 ; lock ; cmpxchg %2, %1\"\n"
21498" : \"+a\" (_Comparand), \"+m\" (*_Destination)\n"
21499" : \"r\" (_Exchange) : \"memory\");\n"
21500" return _Comparand;\n"
21501"}\n"
21502"#endif\n"
21503"#if defined(__x86_64__)\n"
21504"static __inline__ __int64 __DEFAULT_FN_ATTRS\n"
21505"_InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination,\n"
21506" __int64 _Exchange, __int64 _Comparand) {\n"
21507" __asm__ __volatile__(\".byte 0xf2 ; lock ; cmpxchg %2, %1\"\n"
21508" : \"+a\" (_Comparand), \"+m\" (*_Destination)\n"
21509" : \"r\" (_Exchange) : \"memory\");\n"
21510" return _Comparand;\n"
21511"}\n"
21512"static __inline__ __int64 __DEFAULT_FN_ATTRS\n"
21513"_InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination,\n"
21514" __int64 _Exchange, __int64 _Comparand) {\n"
21515" __asm__ __volatile__(\".byte 0xf3 ; lock ; cmpxchg %2, %1\"\n"
21516" : \"+a\" (_Comparand), \"+m\" (*_Destination)\n"
21517" : \"r\" (_Exchange) : \"memory\");\n"
21518" return _Comparand;\n"
21519"}\n"
21520"#endif\n"
21521"#ifdef __cplusplus\n"
21522"}\n"
21523"#endif\n"
21524"\n"
21525"#undef __DEFAULT_FN_ATTRS\n"
21526"\n"
21527"#endif /* _MSC_VER */\n"
21528"\n"
21529"#endif /* __IMMINTRIN_H */\n"
21530"" } ,
21531 { "/builtins/intrin.h" , "/* ===-------- intrin.h ---------------------------------------------------===\n"
21532" *\n"
21533" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
21534" * of this software and associated documentation files (the \"Software\"), to deal\n"
21535" * in the Software without restriction, including without limitation the rights\n"
21536" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
21537" * copies of the Software, and to permit persons to whom the Software is\n"
21538" * furnished to do so, subject to the following conditions:\n"
21539" *\n"
21540" * The above copyright notice and this permission notice shall be included in\n"
21541" * all copies or substantial portions of the Software.\n"
21542" *\n"
21543" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
21544" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
21545" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
21546" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
21547" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
21548" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
21549" * THE SOFTWARE.\n"
21550" *\n"
21551" *===-----------------------------------------------------------------------===\n"
21552" */\n"
21553"\n"
21554"/* Only include this if we're compiling for the windows platform. */\n"
21555"#ifndef _MSC_VER\n"
21556"#include_next <intrin.h>\n"
21557"#else\n"
21558"\n"
21559"#ifndef __INTRIN_H\n"
21560"#define __INTRIN_H\n"
21561"\n"
21562"/* First include the standard intrinsics. */\n"
21563"#if defined(__i386__) || defined(__x86_64__)\n"
21564"#include <x86intrin.h>\n"
21565"#endif\n"
21566"\n"
21567"#if defined(__arm__)\n"
21568"#include <armintr.h>\n"
21569"#endif\n"
21570"\n"
21571"#if defined(__aarch64__)\n"
21572"#include <arm64intr.h>\n"
21573"#endif\n"
21574"\n"
21575"/* For the definition of jmp_buf. */\n"
21576"#if __STDC_HOSTED__\n"
21577"#include <setjmp.h>\n"
21578"#endif\n"
21579"\n"
21580"/* Define the default attributes for the functions in this file. */\n"
21581"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))\n"
21582"\n"
21583"#ifdef __cplusplus\n"
21584"extern \"C\" {\n"
21585"#endif\n"
21586"\n"
21587"#if defined(__MMX__)\n"
21588"/* And the random ones that aren't in those files. */\n"
21589"__m64 _m_from_float(float);\n"
21590"float _m_to_float(__m64);\n"
21591"#endif\n"
21592"\n"
21593"/* Other assorted instruction intrinsics. */\n"
21594"void __addfsbyte(unsigned long, unsigned char);\n"
21595"void __addfsdword(unsigned long, unsigned long);\n"
21596"void __addfsword(unsigned long, unsigned short);\n"
21597"void __code_seg(const char *);\n"
21598"static __inline__\n"
21599"void __cpuid(int[4], int);\n"
21600"static __inline__\n"
21601"void __cpuidex(int[4], int, int);\n"
21602"static __inline__\n"
21603"__int64 __emul(int, int);\n"
21604"static __inline__\n"
21605"unsigned __int64 __emulu(unsigned int, unsigned int);\n"
21606"unsigned int __getcallerseflags(void);\n"
21607"static __inline__\n"
21608"void __halt(void);\n"
21609"unsigned char __inbyte(unsigned short);\n"
21610"void __inbytestring(unsigned short, unsigned char *, unsigned long);\n"
21611"void __incfsbyte(unsigned long);\n"
21612"void __incfsdword(unsigned long);\n"
21613"void __incfsword(unsigned long);\n"
21614"unsigned long __indword(unsigned short);\n"
21615"void __indwordstring(unsigned short, unsigned long *, unsigned long);\n"
21616"void __int2c(void);\n"
21617"void __invlpg(void *);\n"
21618"unsigned short __inword(unsigned short);\n"
21619"void __inwordstring(unsigned short, unsigned short *, unsigned long);\n"
21620"void __lidt(void *);\n"
21621"unsigned __int64 __ll_lshift(unsigned __int64, int);\n"
21622"__int64 __ll_rshift(__int64, int);\n"
21623"unsigned int __lzcnt(unsigned int);\n"
21624"unsigned short __lzcnt16(unsigned short);\n"
21625"static __inline__\n"
21626"void __movsb(unsigned char *, unsigned char const *, size_t);\n"
21627"static __inline__\n"
21628"void __movsd(unsigned long *, unsigned long const *, size_t);\n"
21629"static __inline__\n"
21630"void __movsw(unsigned short *, unsigned short const *, size_t);\n"
21631"static __inline__\n"
21632"void __nop(void);\n"
21633"void __nvreg_restore_fence(void);\n"
21634"void __nvreg_save_fence(void);\n"
21635"void __outbyte(unsigned short, unsigned char);\n"
21636"void __outbytestring(unsigned short, unsigned char *, unsigned long);\n"
21637"void __outdword(unsigned short, unsigned long);\n"
21638"void __outdwordstring(unsigned short, unsigned long *, unsigned long);\n"
21639"void __outword(unsigned short, unsigned short);\n"
21640"void __outwordstring(unsigned short, unsigned short *, unsigned long);\n"
21641"unsigned long __readcr0(void);\n"
21642"unsigned long __readcr2(void);\n"
21643"static __inline__\n"
21644"unsigned long __readcr3(void);\n"
21645"unsigned long __readcr4(void);\n"
21646"unsigned long __readcr8(void);\n"
21647"unsigned int __readdr(unsigned int);\n"
21648"#ifdef __i386__\n"
21649"static __inline__\n"
21650"unsigned char __readfsbyte(unsigned long);\n"
21651"static __inline__\n"
21652"unsigned __int64 __readfsqword(unsigned long);\n"
21653"static __inline__\n"
21654"unsigned short __readfsword(unsigned long);\n"
21655"#endif\n"
21656"static __inline__\n"
21657"unsigned __int64 __readmsr(unsigned long);\n"
21658"unsigned __int64 __readpmc(unsigned long);\n"
21659"unsigned long __segmentlimit(unsigned long);\n"
21660"void __sidt(void *);\n"
21661"static __inline__\n"
21662"void __stosb(unsigned char *, unsigned char, size_t);\n"
21663"static __inline__\n"
21664"void __stosd(unsigned long *, unsigned long, size_t);\n"
21665"static __inline__\n"
21666"void __stosw(unsigned short *, unsigned short, size_t);\n"
21667"void __svm_clgi(void);\n"
21668"void __svm_invlpga(void *, int);\n"
21669"void __svm_skinit(int);\n"
21670"void __svm_stgi(void);\n"
21671"void __svm_vmload(size_t);\n"
21672"void __svm_vmrun(size_t);\n"
21673"void __svm_vmsave(size_t);\n"
21674"void __ud2(void);\n"
21675"unsigned __int64 __ull_rshift(unsigned __int64, int);\n"
21676"void __vmx_off(void);\n"
21677"void __vmx_vmptrst(unsigned __int64 *);\n"
21678"void __wbinvd(void);\n"
21679"void __writecr0(unsigned int);\n"
21680"static __inline__\n"
21681"void __writecr3(unsigned int);\n"
21682"void __writecr4(unsigned int);\n"
21683"void __writecr8(unsigned int);\n"
21684"void __writedr(unsigned int, unsigned int);\n"
21685"void __writefsbyte(unsigned long, unsigned char);\n"
21686"void __writefsdword(unsigned long, unsigned long);\n"
21687"void __writefsqword(unsigned long, unsigned __int64);\n"
21688"void __writefsword(unsigned long, unsigned short);\n"
21689"void __writemsr(unsigned long, unsigned __int64);\n"
21690"static __inline__\n"
21691"void *_AddressOfReturnAddress(void);\n"
21692"static __inline__\n"
21693"unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);\n"
21694"static __inline__\n"
21695"unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);\n"
21696"unsigned char _bittest(long const *, long);\n"
21697"unsigned char _bittestandcomplement(long *, long);\n"
21698"unsigned char _bittestandreset(long *, long);\n"
21699"unsigned char _bittestandset(long *, long);\n"
21700"void __cdecl _disable(void);\n"
21701"void __cdecl _enable(void);\n"
21702"long _InterlockedAddLargeStatistic(__int64 volatile *_Addend, long _Value);\n"
21703"unsigned char _interlockedbittestandreset(long volatile *, long);\n"
21704"unsigned char _interlockedbittestandset(long volatile *, long);\n"
21705"void *_InterlockedCompareExchangePointer_HLEAcquire(void *volatile *, void *,\n"
21706" void *);\n"
21707"void *_InterlockedCompareExchangePointer_HLERelease(void *volatile *, void *,\n"
21708" void *);\n"
21709"long _InterlockedExchangeAdd_HLEAcquire(long volatile *, long);\n"
21710"long _InterlockedExchangeAdd_HLERelease(long volatile *, long);\n"
21711"__int64 _InterlockedExchangeAdd64_HLEAcquire(__int64 volatile *, __int64);\n"
21712"__int64 _InterlockedExchangeAdd64_HLERelease(__int64 volatile *, __int64);\n"
21713"void __cdecl _invpcid(unsigned int, void *);\n"
21714"static __inline__ void\n"
21715"__attribute__((__deprecated__(\"use other intrinsics or C++11 atomics instead\")))\n"
21716"_ReadBarrier(void);\n"
21717"static __inline__ void\n"
21718"__attribute__((__deprecated__(\"use other intrinsics or C++11 atomics instead\")))\n"
21719"_ReadWriteBarrier(void);\n"
21720"unsigned int _rorx_u32(unsigned int, const unsigned int);\n"
21721"int _sarx_i32(int, unsigned int);\n"
21722"#if __STDC_HOSTED__\n"
21723"int __cdecl _setjmp(jmp_buf);\n"
21724"#endif\n"
21725"unsigned int _shlx_u32(unsigned int, unsigned int);\n"
21726"unsigned int _shrx_u32(unsigned int, unsigned int);\n"
21727"void _Store_HLERelease(long volatile *, long);\n"
21728"void _Store64_HLERelease(__int64 volatile *, __int64);\n"
21729"void _StorePointer_HLERelease(void *volatile *, void *);\n"
21730"static __inline__ void\n"
21731"__attribute__((__deprecated__(\"use other intrinsics or C++11 atomics instead\")))\n"
21732"_WriteBarrier(void);\n"
21733"unsigned __int32 xbegin(void);\n"
21734"void _xend(void);\n"
21735"static __inline__\n"
21736"#define _XCR_XFEATURE_ENABLED_MASK 0\n"
21737"unsigned __int64 __cdecl _xgetbv(unsigned int);\n"
21738"void __cdecl _xsetbv(unsigned int, unsigned __int64);\n"
21739"\n"
21740"/* These additional intrinsics are turned on in x64/amd64/x86_64 mode. */\n"
21741"#ifdef __x86_64__\n"
21742"void __addgsbyte(unsigned long, unsigned char);\n"
21743"void __addgsdword(unsigned long, unsigned long);\n"
21744"void __addgsqword(unsigned long, unsigned __int64);\n"
21745"void __addgsword(unsigned long, unsigned short);\n"
21746"static __inline__\n"
21747"void __faststorefence(void);\n"
21748"void __incgsbyte(unsigned long);\n"
21749"void __incgsdword(unsigned long);\n"
21750"void __incgsqword(unsigned long);\n"
21751"void __incgsword(unsigned long);\n"
21752"unsigned __int64 __lzcnt64(unsigned __int64);\n"
21753"static __inline__\n"
21754"void __movsq(unsigned long long *, unsigned long long const *, size_t);\n"
21755"static __inline__\n"
21756"unsigned char __readgsbyte(unsigned long);\n"
21757"static __inline__\n"
21758"unsigned long __readgsdword(unsigned long);\n"
21759"static __inline__\n"
21760"unsigned __int64 __readgsqword(unsigned long);\n"
21761"unsigned short __readgsword(unsigned long);\n"
21762"unsigned __int64 __shiftleft128(unsigned __int64 _LowPart,\n"
21763" unsigned __int64 _HighPart,\n"
21764" unsigned char _Shift);\n"
21765"unsigned __int64 __shiftright128(unsigned __int64 _LowPart,\n"
21766" unsigned __int64 _HighPart,\n"
21767" unsigned char _Shift);\n"
21768"static __inline__\n"
21769"void __stosq(unsigned __int64 *, unsigned __int64, size_t);\n"
21770"unsigned char __vmx_on(unsigned __int64 *);\n"
21771"unsigned char __vmx_vmclear(unsigned __int64 *);\n"
21772"unsigned char __vmx_vmlaunch(void);\n"
21773"unsigned char __vmx_vmptrld(unsigned __int64 *);\n"
21774"unsigned char __vmx_vmread(size_t, size_t *);\n"
21775"unsigned char __vmx_vmresume(void);\n"
21776"unsigned char __vmx_vmwrite(size_t, size_t);\n"
21777"void __writegsbyte(unsigned long, unsigned char);\n"
21778"void __writegsdword(unsigned long, unsigned long);\n"
21779"void __writegsqword(unsigned long, unsigned __int64);\n"
21780"void __writegsword(unsigned long, unsigned short);\n"
21781"unsigned char _bittest64(__int64 const *, __int64);\n"
21782"unsigned char _bittestandcomplement64(__int64 *, __int64);\n"
21783"unsigned char _bittestandreset64(__int64 *, __int64);\n"
21784"unsigned char _bittestandset64(__int64 *, __int64);\n"
21785"long _InterlockedAnd_np(long volatile *_Value, long _Mask);\n"
21786"short _InterlockedAnd16_np(short volatile *_Value, short _Mask);\n"
21787"__int64 _InterlockedAnd64_np(__int64 volatile *_Value, __int64 _Mask);\n"
21788"char _InterlockedAnd8_np(char volatile *_Value, char _Mask);\n"
21789"unsigned char _interlockedbittestandreset64(__int64 volatile *, __int64);\n"
21790"unsigned char _interlockedbittestandset64(__int64 volatile *, __int64);\n"
21791"long _InterlockedCompareExchange_np(long volatile *_Destination, long _Exchange,\n"
21792" long _Comparand);\n"
21793"unsigned char _InterlockedCompareExchange128(__int64 volatile *_Destination,\n"
21794" __int64 _ExchangeHigh,\n"
21795" __int64 _ExchangeLow,\n"
21796" __int64 *_CompareandResult);\n"
21797"unsigned char _InterlockedCompareExchange128_np(__int64 volatile *_Destination,\n"
21798" __int64 _ExchangeHigh,\n"
21799" __int64 _ExchangeLow,\n"
21800" __int64 *_ComparandResult);\n"
21801"short _InterlockedCompareExchange16_np(short volatile *_Destination,\n"
21802" short _Exchange, short _Comparand);\n"
21803"__int64 _InterlockedCompareExchange64_np(__int64 volatile *_Destination,\n"
21804" __int64 _Exchange, __int64 _Comparand);\n"
21805"void *_InterlockedCompareExchangePointer_np(void *volatile *_Destination,\n"
21806" void *_Exchange, void *_Comparand);\n"
21807"long _InterlockedOr_np(long volatile *_Value, long _Mask);\n"
21808"short _InterlockedOr16_np(short volatile *_Value, short _Mask);\n"
21809"__int64 _InterlockedOr64_np(__int64 volatile *_Value, __int64 _Mask);\n"
21810"char _InterlockedOr8_np(char volatile *_Value, char _Mask);\n"
21811"long _InterlockedXor_np(long volatile *_Value, long _Mask);\n"
21812"short _InterlockedXor16_np(short volatile *_Value, short _Mask);\n"
21813"__int64 _InterlockedXor64_np(__int64 volatile *_Value, __int64 _Mask);\n"
21814"char _InterlockedXor8_np(char volatile *_Value, char _Mask);\n"
21815"unsigned __int64 _rorx_u64(unsigned __int64, const unsigned int);\n"
21816"__int64 _sarx_i64(__int64, unsigned int);\n"
21817"unsigned __int64 _shlx_u64(unsigned __int64, unsigned int);\n"
21818"unsigned __int64 _shrx_u64(unsigned __int64, unsigned int);\n"
21819"static __inline__\n"
21820"__int64 __mulh(__int64, __int64);\n"
21821"static __inline__\n"
21822"unsigned __int64 __umulh(unsigned __int64, unsigned __int64);\n"
21823"static __inline__\n"
21824"__int64 _mul128(__int64, __int64, __int64*);\n"
21825"static __inline__\n"
21826"unsigned __int64 _umul128(unsigned __int64,\n"
21827" unsigned __int64,\n"
21828" unsigned __int64*);\n"
21829"\n"
21830"#endif /* __x86_64__ */\n"
21831"\n"
21832"#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)\n"
21833"\n"
21834"static __inline__\n"
21835"unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);\n"
21836"static __inline__\n"
21837"unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);\n"
21838"\n"
21839"static __inline__\n"
21840"__int64 _InterlockedDecrement64(__int64 volatile *_Addend);\n"
21841"static __inline__\n"
21842"__int64 _InterlockedExchange64(__int64 volatile *_Target, __int64 _Value);\n"
21843"static __inline__\n"
21844"__int64 _InterlockedExchangeAdd64(__int64 volatile *_Addend, __int64 _Value);\n"
21845"static __inline__\n"
21846"__int64 _InterlockedExchangeSub64(__int64 volatile *_Subend, __int64 _Value);\n"
21847"static __inline__\n"
21848"__int64 _InterlockedIncrement64(__int64 volatile *_Addend);\n"
21849"static __inline__\n"
21850"__int64 _InterlockedOr64(__int64 volatile *_Value, __int64 _Mask);\n"
21851"static __inline__\n"
21852"__int64 _InterlockedXor64(__int64 volatile *_Value, __int64 _Mask);\n"
21853"static __inline__\n"
21854"__int64 _InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask);\n"
21855"\n"
21856"#endif\n"
21857"\n"
21858"/*----------------------------------------------------------------------------*\\\n"
21859"|* Interlocked Exchange Add\n"
21860"\\*----------------------------------------------------------------------------*/\n"
21861"#if defined(__arm__) || defined(__aarch64__)\n"
21862"static __inline__ char __DEFAULT_FN_ATTRS\n"
21863"_InterlockedExchangeAdd8_acq(char volatile *_Addend, char _Value) {\n"
21864" return __atomic_fetch_add(_Addend, _Value, __ATOMIC_ACQUIRE);\n"
21865"}\n"
21866"static __inline__ char __DEFAULT_FN_ATTRS\n"
21867"_InterlockedExchangeAdd8_nf(char volatile *_Addend, char _Value) {\n"
21868" return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELAXED);\n"
21869"}\n"
21870"static __inline__ char __DEFAULT_FN_ATTRS\n"
21871"_InterlockedExchangeAdd8_rel(char volatile *_Addend, char _Value) {\n"
21872" return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELAXED);\n"
21873"}\n"
21874"static __inline__ short __DEFAULT_FN_ATTRS\n"
21875"_InterlockedExchangeAdd16_acq(short volatile *_Addend, short _Value) {\n"
21876" return __atomic_fetch_add(_Addend, _Value, __ATOMIC_ACQUIRE);\n"
21877"}\n"
21878"static __inline__ short __DEFAULT_FN_ATTRS\n"
21879"_InterlockedExchangeAdd16_nf(short volatile *_Addend, short _Value) {\n"
21880" return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELAXED);\n"
21881"}\n"
21882"static __inline__ short __DEFAULT_FN_ATTRS\n"
21883"_InterlockedExchangeAdd16_rel(short volatile *_Addend, short _Value) {\n"
21884" return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELEASE);\n"
21885"}\n"
21886"static __inline__ long __DEFAULT_FN_ATTRS\n"
21887"_InterlockedExchangeAdd_acq(long volatile *_Addend, long _Value) {\n"
21888" return __atomic_fetch_add(_Addend, _Value, __ATOMIC_ACQUIRE);\n"
21889"}\n"
21890"static __inline__ long __DEFAULT_FN_ATTRS\n"
21891"_InterlockedExchangeAdd_nf(long volatile *_Addend, long _Value) {\n"
21892" return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELAXED);\n"
21893"}\n"
21894"static __inline__ long __DEFAULT_FN_ATTRS\n"
21895"_InterlockedExchangeAdd_rel(long volatile *_Addend, long _Value) {\n"
21896" return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELEASE);\n"
21897"}\n"
21898"static __inline__ __int64 __DEFAULT_FN_ATTRS\n"
21899"_InterlockedExchangeAdd64_acq(__int64 volatile *_Addend, __int64 _Value) {\n"
21900" return __atomic_fetch_add(_Addend, _Value, __ATOMIC_ACQUIRE);\n"
21901"}\n"
21902"static __inline__ __int64 __DEFAULT_FN_ATTRS\n"
21903"_InterlockedExchangeAdd64_nf(__int64 volatile *_Addend, __int64 _Value) {\n"
21904" return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELAXED);\n"
21905"}\n"
21906"static __inline__ __int64 __DEFAULT_FN_ATTRS\n"
21907"_InterlockedExchangeAdd64_rel(__int64 volatile *_Addend, __int64 _Value) {\n"
21908" return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELEASE);\n"
21909"}\n"
21910"#endif\n"
21911"/*----------------------------------------------------------------------------*\\\n"
21912"|* Interlocked Increment\n"
21913"\\*----------------------------------------------------------------------------*/\n"
21914"#if defined(__arm__) || defined(__aarch64__)\n"
21915"static __inline__ short __DEFAULT_FN_ATTRS\n"
21916"_InterlockedIncrement16_acq(short volatile *_Value) {\n"
21917" return __atomic_add_fetch(_Value, 1, __ATOMIC_ACQUIRE);\n"
21918"}\n"
21919"static __inline__ short __DEFAULT_FN_ATTRS\n"
21920"_InterlockedIncrement16_nf(short volatile *_Value) {\n"
21921" return __atomic_add_fetch(_Value, 1, __ATOMIC_RELAXED);\n"
21922"}\n"
21923"static __inline__ short __DEFAULT_FN_ATTRS\n"
21924"_InterlockedIncrement16_rel(short volatile *_Value) {\n"
21925" return __atomic_add_fetch(_Value, 1, __ATOMIC_RELEASE);\n"
21926"}\n"
21927"static __inline__ long __DEFAULT_FN_ATTRS\n"
21928"_InterlockedIncrement_acq(long volatile *_Value) {\n"
21929" return __atomic_add_fetch(_Value, 1, __ATOMIC_ACQUIRE);\n"
21930"}\n"
21931"static __inline__ long __DEFAULT_FN_ATTRS\n"
21932"_InterlockedIncrement_nf(long volatile *_Value) {\n"
21933" return __atomic_add_fetch(_Value, 1, __ATOMIC_RELAXED);\n"
21934"}\n"
21935"static __inline__ long __DEFAULT_FN_ATTRS\n"
21936"_InterlockedIncrement_rel(long volatile *_Value) {\n"
21937" return __atomic_add_fetch(_Value, 1, __ATOMIC_RELEASE);\n"
21938"}\n"
21939"static __inline__ __int64 __DEFAULT_FN_ATTRS\n"
21940"_InterlockedIncrement64_acq(__int64 volatile *_Value) {\n"
21941" return __atomic_add_fetch(_Value, 1, __ATOMIC_ACQUIRE);\n"
21942"}\n"
21943"static __inline__ __int64 __DEFAULT_FN_ATTRS\n"
21944"_InterlockedIncrement64_nf(__int64 volatile *_Value) {\n"
21945" return __atomic_add_fetch(_Value, 1, __ATOMIC_RELAXED);\n"
21946"}\n"
21947"static __inline__ __int64 __DEFAULT_FN_ATTRS\n"
21948"_InterlockedIncrement64_rel(__int64 volatile *_Value) {\n"
21949" return __atomic_add_fetch(_Value, 1, __ATOMIC_RELEASE);\n"
21950"}\n"
21951"#endif\n"
21952"/*----------------------------------------------------------------------------*\\\n"
21953"|* Interlocked Decrement\n"
21954"\\*----------------------------------------------------------------------------*/\n"
21955"#if defined(__arm__) || defined(__aarch64__)\n"
21956"static __inline__ short __DEFAULT_FN_ATTRS\n"
21957"_InterlockedDecrement16_acq(short volatile *_Value) {\n"
21958" return __atomic_sub_fetch(_Value, 1, __ATOMIC_ACQUIRE);\n"
21959"}\n"
21960"static __inline__ short __DEFAULT_FN_ATTRS\n"
21961"_InterlockedDecrement16_nf(short volatile *_Value) {\n"
21962" return __atomic_sub_fetch(_Value, 1, __ATOMIC_RELAXED);\n"
21963"}\n"
21964"static __inline__ short __DEFAULT_FN_ATTRS\n"
21965"_InterlockedDecrement16_rel(short volatile *_Value) {\n"
21966" return __atomic_sub_fetch(_Value, 1, __ATOMIC_RELEASE);\n"
21967"}\n"
21968"static __inline__ long __DEFAULT_FN_ATTRS\n"
21969"_InterlockedDecrement_acq(long volatile *_Value) {\n"
21970" return __atomic_sub_fetch(_Value, 1, __ATOMIC_ACQUIRE);\n"
21971"}\n"
21972"static __inline__ long __DEFAULT_FN_ATTRS\n"
21973"_InterlockedDecrement_nf(long volatile *_Value) {\n"
21974" return __atomic_sub_fetch(_Value, 1, __ATOMIC_RELAXED);\n"
21975"}\n"
21976"static __inline__ long __DEFAULT_FN_ATTRS\n"
21977"_InterlockedDecrement_rel(long volatile *_Value) {\n"
21978" return __atomic_sub_fetch(_Value, 1, __ATOMIC_RELEASE);\n"
21979"}\n"
21980"static __inline__ __int64 __DEFAULT_FN_ATTRS\n"
21981"_InterlockedDecrement64_acq(__int64 volatile *_Value) {\n"
21982" return __atomic_sub_fetch(_Value, 1, __ATOMIC_ACQUIRE);\n"
21983"}\n"
21984"static __inline__ __int64 __DEFAULT_FN_ATTRS\n"
21985"_InterlockedDecrement64_nf(__int64 volatile *_Value) {\n"
21986" return __atomic_sub_fetch(_Value, 1, __ATOMIC_RELAXED);\n"
21987"}\n"
21988"static __inline__ __int64 __DEFAULT_FN_ATTRS\n"
21989"_InterlockedDecrement64_rel(__int64 volatile *_Value) {\n"
21990" return __atomic_sub_fetch(_Value, 1, __ATOMIC_RELEASE);\n"
21991"}\n"
21992"#endif\n"
21993"/*----------------------------------------------------------------------------*\\\n"
21994"|* Interlocked And\n"
21995"\\*----------------------------------------------------------------------------*/\n"
21996"#if defined(__arm__) || defined(__aarch64__)\n"
21997"static __inline__ char __DEFAULT_FN_ATTRS\n"
21998"_InterlockedAnd8_acq(char volatile *_Value, char _Mask) {\n"
21999" return __atomic_fetch_and(_Value, _Mask, __ATOMIC_ACQUIRE);\n"
22000"}\n"
22001"static __inline__ char __DEFAULT_FN_ATTRS\n"
22002"_InterlockedAnd8_nf(char volatile *_Value, char _Mask) {\n"
22003" return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELAXED);\n"
22004"}\n"
22005"static __inline__ char __DEFAULT_FN_ATTRS\n"
22006"_InterlockedAnd8_rel(char volatile *_Value, char _Mask) {\n"
22007" return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELEASE);\n"
22008"}\n"
22009"static __inline__ short __DEFAULT_FN_ATTRS\n"
22010"_InterlockedAnd16_acq(short volatile *_Value, short _Mask) {\n"
22011" return __atomic_fetch_and(_Value, _Mask, __ATOMIC_ACQUIRE);\n"
22012"}\n"
22013"static __inline__ short __DEFAULT_FN_ATTRS\n"
22014"_InterlockedAnd16_nf(short volatile *_Value, short _Mask) {\n"
22015" return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELAXED);\n"
22016"}\n"
22017"static __inline__ short __DEFAULT_FN_ATTRS\n"
22018"_InterlockedAnd16_rel(short volatile *_Value, short _Mask) {\n"
22019" return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELEASE);\n"
22020"}\n"
22021"static __inline__ long __DEFAULT_FN_ATTRS\n"
22022"_InterlockedAnd_acq(long volatile *_Value, long _Mask) {\n"
22023" return __atomic_fetch_and(_Value, _Mask, __ATOMIC_ACQUIRE);\n"
22024"}\n"
22025"static __inline__ long __DEFAULT_FN_ATTRS\n"
22026"_InterlockedAnd_nf(long volatile *_Value, long _Mask) {\n"
22027" return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELAXED);\n"
22028"}\n"
22029"static __inline__ long __DEFAULT_FN_ATTRS\n"
22030"_InterlockedAnd_rel(long volatile *_Value, long _Mask) {\n"
22031" return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELEASE);\n"
22032"}\n"
22033"static __inline__ __int64 __DEFAULT_FN_ATTRS\n"
22034"_InterlockedAnd64_acq(__int64 volatile *_Value, __int64 _Mask) {\n"
22035" return __atomic_fetch_and(_Value, _Mask, __ATOMIC_ACQUIRE);\n"
22036"}\n"
22037"static __inline__ __int64 __DEFAULT_FN_ATTRS\n"
22038"_InterlockedAnd64_nf(__int64 volatile *_Value, __int64 _Mask) {\n"
22039" return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELAXED);\n"
22040"}\n"
22041"static __inline__ __int64 __DEFAULT_FN_ATTRS\n"
22042"_InterlockedAnd64_rel(__int64 volatile *_Value, __int64 _Mask) {\n"
22043" return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELEASE);\n"
22044"}\n"
22045"#endif\n"
22046"/*----------------------------------------------------------------------------*\\\n"
22047"|* Bit Counting and Testing\n"
22048"\\*----------------------------------------------------------------------------*/\n"
22049"#if defined(__arm__) || defined(__aarch64__)\n"
22050"unsigned char _interlockedbittestandset_acq(long volatile *_BitBase,\n"
22051" long _BitPos);\n"
22052"unsigned char _interlockedbittestandset_nf(long volatile *_BitBase,\n"
22053" long _BitPos);\n"
22054"unsigned char _interlockedbittestandset_rel(long volatile *_BitBase,\n"
22055" long _BitPos);\n"
22056"unsigned char _interlockedbittestandreset_acq(long volatile *_BitBase,\n"
22057" long _BitPos);\n"
22058"unsigned char _interlockedbittestandreset_nf(long volatile *_BitBase,\n"
22059" long _BitPos);\n"
22060"unsigned char _interlockedbittestandreset_rel(long volatile *_BitBase,\n"
22061" long _BitPos);\n"
22062"#endif\n"
22063"/*----------------------------------------------------------------------------*\\\n"
22064"|* Interlocked Or\n"
22065"\\*----------------------------------------------------------------------------*/\n"
22066"#if defined(__arm__) || defined(__aarch64__)\n"
22067"static __inline__ char __DEFAULT_FN_ATTRS\n"
22068"_InterlockedOr8_acq(char volatile *_Value, char _Mask) {\n"
22069" return __atomic_fetch_or(_Value, _Mask, __ATOMIC_ACQUIRE);\n"
22070"}\n"
22071"static __inline__ char __DEFAULT_FN_ATTRS\n"
22072"_InterlockedOr8_nf(char volatile *_Value, char _Mask) {\n"
22073" return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELAXED);\n"
22074"}\n"
22075"static __inline__ char __DEFAULT_FN_ATTRS\n"
22076"_InterlockedOr8_rel(char volatile *_Value, char _Mask) {\n"
22077" return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELEASE);\n"
22078"}\n"
22079"static __inline__ short __DEFAULT_FN_ATTRS\n"
22080"_InterlockedOr16_acq(short volatile *_Value, short _Mask) {\n"
22081" return __atomic_fetch_or(_Value, _Mask, __ATOMIC_ACQUIRE);\n"
22082"}\n"
22083"static __inline__ short __DEFAULT_FN_ATTRS\n"
22084"_InterlockedOr16_nf(short volatile *_Value, short _Mask) {\n"
22085" return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELAXED);\n"
22086"}\n"
22087"static __inline__ short __DEFAULT_FN_ATTRS\n"
22088"_InterlockedOr16_rel(short volatile *_Value, short _Mask) {\n"
22089" return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELEASE);\n"
22090"}\n"
22091"static __inline__ long __DEFAULT_FN_ATTRS\n"
22092"_InterlockedOr_acq(long volatile *_Value, long _Mask) {\n"
22093" return __atomic_fetch_or(_Value, _Mask, __ATOMIC_ACQUIRE);\n"
22094"}\n"
22095"static __inline__ long __DEFAULT_FN_ATTRS\n"
22096"_InterlockedOr_nf(long volatile *_Value, long _Mask) {\n"
22097" return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELAXED);\n"
22098"}\n"
22099"static __inline__ long __DEFAULT_FN_ATTRS\n"
22100"_InterlockedOr_rel(long volatile *_Value, long _Mask) {\n"
22101" return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELEASE);\n"
22102"}\n"
22103"static __inline__ __int64 __DEFAULT_FN_ATTRS\n"
22104"_InterlockedOr64_acq(__int64 volatile *_Value, __int64 _Mask) {\n"
22105" return __atomic_fetch_or(_Value, _Mask, __ATOMIC_ACQUIRE);\n"
22106"}\n"
22107"static __inline__ __int64 __DEFAULT_FN_ATTRS\n"
22108"_InterlockedOr64_nf(__int64 volatile *_Value, __int64 _Mask) {\n"
22109" return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELAXED);\n"
22110"}\n"
22111"static __inline__ __int64 __DEFAULT_FN_ATTRS\n"
22112"_InterlockedOr64_rel(__int64 volatile *_Value, __int64 _Mask) {\n"
22113" return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELEASE);\n"
22114"}\n"
22115"#endif\n"
22116"/*----------------------------------------------------------------------------*\\\n"
22117"|* Interlocked Xor\n"
22118"\\*----------------------------------------------------------------------------*/\n"
22119"#if defined(__arm__) || defined(__aarch64__)\n"
22120"static __inline__ char __DEFAULT_FN_ATTRS\n"
22121"_InterlockedXor8_acq(char volatile *_Value, char _Mask) {\n"
22122" return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_ACQUIRE);\n"
22123"}\n"
22124"static __inline__ char __DEFAULT_FN_ATTRS\n"
22125"_InterlockedXor8_nf(char volatile *_Value, char _Mask) {\n"
22126" return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELAXED);\n"
22127"}\n"
22128"static __inline__ char __DEFAULT_FN_ATTRS\n"
22129"_InterlockedXor8_rel(char volatile *_Value, char _Mask) {\n"
22130" return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELEASE);\n"
22131"}\n"
22132"static __inline__ short __DEFAULT_FN_ATTRS\n"
22133"_InterlockedXor16_acq(short volatile *_Value, short _Mask) {\n"
22134" return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_ACQUIRE);\n"
22135"}\n"
22136"static __inline__ short __DEFAULT_FN_ATTRS\n"
22137"_InterlockedXor16_nf(short volatile *_Value, short _Mask) {\n"
22138" return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELAXED);\n"
22139"}\n"
22140"static __inline__ short __DEFAULT_FN_ATTRS\n"
22141"_InterlockedXor16_rel(short volatile *_Value, short _Mask) {\n"
22142" return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELEASE);\n"
22143"}\n"
22144"static __inline__ long __DEFAULT_FN_ATTRS\n"
22145"_InterlockedXor_acq(long volatile *_Value, long _Mask) {\n"
22146" return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_ACQUIRE);\n"
22147"}\n"
22148"static __inline__ long __DEFAULT_FN_ATTRS\n"
22149"_InterlockedXor_nf(long volatile *_Value, long _Mask) {\n"
22150" return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELAXED);\n"
22151"}\n"
22152"static __inline__ long __DEFAULT_FN_ATTRS\n"
22153"_InterlockedXor_rel(long volatile *_Value, long _Mask) {\n"
22154" return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELEASE);\n"
22155"}\n"
22156"static __inline__ __int64 __DEFAULT_FN_ATTRS\n"
22157"_InterlockedXor64_acq(__int64 volatile *_Value, __int64 _Mask) {\n"
22158" return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_ACQUIRE);\n"
22159"}\n"
22160"static __inline__ __int64 __DEFAULT_FN_ATTRS\n"
22161"_InterlockedXor64_nf(__int64 volatile *_Value, __int64 _Mask) {\n"
22162" return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELAXED);\n"
22163"}\n"
22164"static __inline__ __int64 __DEFAULT_FN_ATTRS\n"
22165"_InterlockedXor64_rel(__int64 volatile *_Value, __int64 _Mask) {\n"
22166" return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELEASE);\n"
22167"}\n"
22168"#endif\n"
22169"/*----------------------------------------------------------------------------*\\\n"
22170"|* Interlocked Exchange\n"
22171"\\*----------------------------------------------------------------------------*/\n"
22172"#if defined(__arm__) || defined(__aarch64__)\n"
22173"static __inline__ char __DEFAULT_FN_ATTRS\n"
22174"_InterlockedExchange8_acq(char volatile *_Target, char _Value) {\n"
22175" __atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_ACQUIRE);\n"
22176" return _Value;\n"
22177"}\n"
22178"static __inline__ char __DEFAULT_FN_ATTRS\n"
22179"_InterlockedExchange8_nf(char volatile *_Target, char _Value) {\n"
22180" __atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELAXED);\n"
22181" return _Value;\n"
22182"}\n"
22183"static __inline__ char __DEFAULT_FN_ATTRS\n"
22184"_InterlockedExchange8_rel(char volatile *_Target, char _Value) {\n"
22185" __atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELEASE);\n"
22186" return _Value;\n"
22187"}\n"
22188"static __inline__ short __DEFAULT_FN_ATTRS\n"
22189"_InterlockedExchange16_acq(short volatile *_Target, short _Value) {\n"
22190" __atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_ACQUIRE);\n"
22191" return _Value;\n"
22192"}\n"
22193"static __inline__ short __DEFAULT_FN_ATTRS\n"
22194"_InterlockedExchange16_nf(short volatile *_Target, short _Value) {\n"
22195" __atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELAXED);\n"
22196" return _Value;\n"
22197"}\n"
22198"static __inline__ short __DEFAULT_FN_ATTRS\n"
22199"_InterlockedExchange16_rel(short volatile *_Target, short _Value) {\n"
22200" __atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELEASE);\n"
22201" return _Value;\n"
22202"}\n"
22203"static __inline__ long __DEFAULT_FN_ATTRS\n"
22204"_InterlockedExchange_acq(long volatile *_Target, long _Value) {\n"
22205" __atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_ACQUIRE);\n"
22206" return _Value;\n"
22207"}\n"
22208"static __inline__ long __DEFAULT_FN_ATTRS\n"
22209"_InterlockedExchange_nf(long volatile *_Target, long _Value) {\n"
22210" __atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELAXED);\n"
22211" return _Value;\n"
22212"}\n"
22213"static __inline__ long __DEFAULT_FN_ATTRS\n"
22214"_InterlockedExchange_rel(long volatile *_Target, long _Value) {\n"
22215" __atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELEASE);\n"
22216" return _Value;\n"
22217"}\n"
22218"static __inline__ __int64 __DEFAULT_FN_ATTRS\n"
22219"_InterlockedExchange64_acq(__int64 volatile *_Target, __int64 _Value) {\n"
22220" __atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_ACQUIRE);\n"
22221" return _Value;\n"
22222"}\n"
22223"static __inline__ __int64 __DEFAULT_FN_ATTRS\n"
22224"_InterlockedExchange64_nf(__int64 volatile *_Target, __int64 _Value) {\n"
22225" __atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELAXED);\n"
22226" return _Value;\n"
22227"}\n"
22228"static __inline__ __int64 __DEFAULT_FN_ATTRS\n"
22229"_InterlockedExchange64_rel(__int64 volatile *_Target, __int64 _Value) {\n"
22230" __atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELEASE);\n"
22231" return _Value;\n"
22232"}\n"
22233"#endif\n"
22234"/*----------------------------------------------------------------------------*\\\n"
22235"|* Interlocked Compare Exchange\n"
22236"\\*----------------------------------------------------------------------------*/\n"
22237"#if defined(__arm__) || defined(__aarch64__)\n"
22238"static __inline__ char __DEFAULT_FN_ATTRS\n"
22239"_InterlockedCompareExchange8_acq(char volatile *_Destination,\n"
22240" char _Exchange, char _Comparand) {\n"
22241" __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,\n"
22242" __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);\n"
22243" return _Comparand;\n"
22244"}\n"
22245"static __inline__ char __DEFAULT_FN_ATTRS\n"
22246"_InterlockedCompareExchange8_nf(char volatile *_Destination,\n"
22247" char _Exchange, char _Comparand) {\n"
22248" __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,\n"
22249" __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);\n"
22250" return _Comparand;\n"
22251"}\n"
22252"static __inline__ char __DEFAULT_FN_ATTRS\n"
22253"_InterlockedCompareExchange8_rel(char volatile *_Destination,\n"
22254" char _Exchange, char _Comparand) {\n"
22255" __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,\n"
22256" __ATOMIC_SEQ_CST, __ATOMIC_RELEASE);\n"
22257" return _Comparand;\n"
22258"}\n"
22259"static __inline__ short __DEFAULT_FN_ATTRS\n"
22260"_InterlockedCompareExchange16_acq(short volatile *_Destination,\n"
22261" short _Exchange, short _Comparand) {\n"
22262" __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,\n"
22263" __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);\n"
22264" return _Comparand;\n"
22265"}\n"
22266"static __inline__ short __DEFAULT_FN_ATTRS\n"
22267"_InterlockedCompareExchange16_nf(short volatile *_Destination,\n"
22268" short _Exchange, short _Comparand) {\n"
22269" __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,\n"
22270" __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);\n"
22271" return _Comparand;\n"
22272"}\n"
22273"static __inline__ short __DEFAULT_FN_ATTRS\n"
22274"_InterlockedCompareExchange16_rel(short volatile *_Destination,\n"
22275" short _Exchange, short _Comparand) {\n"
22276" __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,\n"
22277" __ATOMIC_SEQ_CST, __ATOMIC_RELEASE);\n"
22278" return _Comparand;\n"
22279"}\n"
22280"static __inline__ long __DEFAULT_FN_ATTRS\n"
22281"_InterlockedCompareExchange_acq(long volatile *_Destination,\n"
22282" long _Exchange, long _Comparand) {\n"
22283" __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,\n"
22284" __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);\n"
22285" return _Comparand;\n"
22286"}\n"
22287"static __inline__ long __DEFAULT_FN_ATTRS\n"
22288"_InterlockedCompareExchange_nf(long volatile *_Destination,\n"
22289" long _Exchange, long _Comparand) {\n"
22290" __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,\n"
22291" __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);\n"
22292" return _Comparand;\n"
22293"}\n"
22294"static __inline__ long __DEFAULT_FN_ATTRS\n"
22295"_InterlockedCompareExchange_rel(long volatile *_Destination,\n"
22296" long _Exchange, long _Comparand) {\n"
22297" __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,\n"
22298" __ATOMIC_SEQ_CST, __ATOMIC_RELEASE);\n"
22299" return _Comparand;\n"
22300"}\n"
22301"static __inline__ __int64 __DEFAULT_FN_ATTRS\n"
22302"_InterlockedCompareExchange64_acq(__int64 volatile *_Destination,\n"
22303" __int64 _Exchange, __int64 _Comparand) {\n"
22304" __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,\n"
22305" __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);\n"
22306" return _Comparand;\n"
22307"}\n"
22308"static __inline__ __int64 __DEFAULT_FN_ATTRS\n"
22309"_InterlockedCompareExchange64_nf(__int64 volatile *_Destination,\n"
22310" __int64 _Exchange, __int64 _Comparand) {\n"
22311" __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,\n"
22312" __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);\n"
22313" return _Comparand;\n"
22314"}\n"
22315"static __inline__ __int64 __DEFAULT_FN_ATTRS\n"
22316"_InterlockedCompareExchange64_rel(__int64 volatile *_Destination,\n"
22317" __int64 _Exchange, __int64 _Comparand) {\n"
22318" __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,\n"
22319" __ATOMIC_SEQ_CST, __ATOMIC_RELEASE);\n"
22320" return _Comparand;\n"
22321"}\n"
22322"#endif\n"
22323"\n"
22324"/*----------------------------------------------------------------------------*\\\n"
22325"|* movs, stos\n"
22326"\\*----------------------------------------------------------------------------*/\n"
22327"#if defined(__i386__) || defined(__x86_64__)\n"
22328"static __inline__ void __DEFAULT_FN_ATTRS\n"
22329"__movsb(unsigned char *__dst, unsigned char const *__src, size_t __n) {\n"
22330" __asm__ __volatile__(\"rep movsb\" : \"+D\"(__dst), \"+S\"(__src), \"+c\"(__n)\n"
22331" : : \"memory\");\n"
22332"}\n"
22333"static __inline__ void __DEFAULT_FN_ATTRS\n"
22334"__movsd(unsigned long *__dst, unsigned long const *__src, size_t __n) {\n"
22335" __asm__ __volatile__(\"rep movsl\" : \"+D\"(__dst), \"+S\"(__src), \"+c\"(__n)\n"
22336" : : \"memory\");\n"
22337"}\n"
22338"static __inline__ void __DEFAULT_FN_ATTRS\n"
22339"__movsw(unsigned short *__dst, unsigned short const *__src, size_t __n) {\n"
22340" __asm__ __volatile__(\"rep movsw\" : \"+D\"(__dst), \"+S\"(__src), \"+c\"(__n)\n"
22341" : : \"memory\");\n"
22342"}\n"
22343"static __inline__ void __DEFAULT_FN_ATTRS\n"
22344"__stosd(unsigned long *__dst, unsigned long __x, size_t __n) {\n"
22345" __asm__ __volatile__(\"rep stosl\" : \"+D\"(__dst), \"+c\"(__n) : \"a\"(__x)\n"
22346" : \"memory\");\n"
22347"}\n"
22348"static __inline__ void __DEFAULT_FN_ATTRS\n"
22349"__stosw(unsigned short *__dst, unsigned short __x, size_t __n) {\n"
22350" __asm__ __volatile__(\"rep stosw\" : \"+D\"(__dst), \"+c\"(__n) : \"a\"(__x)\n"
22351" : \"memory\");\n"
22352"}\n"
22353"#endif\n"
22354"#ifdef __x86_64__\n"
22355"static __inline__ void __DEFAULT_FN_ATTRS\n"
22356"__movsq(unsigned long long *__dst, unsigned long long const *__src, size_t __n) {\n"
22357" __asm__ __volatile__(\"rep movsq\" : \"+D\"(__dst), \"+S\"(__src), \"+c\"(__n)\n"
22358" : : \"memory\");\n"
22359"}\n"
22360"static __inline__ void __DEFAULT_FN_ATTRS\n"
22361"__stosq(unsigned __int64 *__dst, unsigned __int64 __x, size_t __n) {\n"
22362" __asm__ __volatile__(\"rep stosq\" : \"+D\"(__dst), \"+c\"(__n) : \"a\"(__x)\n"
22363" : \"memory\");\n"
22364"}\n"
22365"#endif\n"
22366"\n"
22367"/*----------------------------------------------------------------------------*\\\n"
22368"|* Misc\n"
22369"\\*----------------------------------------------------------------------------*/\n"
22370"#if defined(__i386__) || defined(__x86_64__)\n"
22371"static __inline__ void __DEFAULT_FN_ATTRS\n"
22372"__cpuid(int __info[4], int __level) {\n"
22373" __asm__ (\"cpuid\" : \"=a\"(__info[0]), \"=b\" (__info[1]), \"=c\"(__info[2]), \"=d\"(__info[3])\n"
22374" : \"a\"(__level));\n"
22375"}\n"
22376"static __inline__ void __DEFAULT_FN_ATTRS\n"
22377"__cpuidex(int __info[4], int __level, int __ecx) {\n"
22378" __asm__ (\"cpuid\" : \"=a\"(__info[0]), \"=b\" (__info[1]), \"=c\"(__info[2]), \"=d\"(__info[3])\n"
22379" : \"a\"(__level), \"c\"(__ecx));\n"
22380"}\n"
22381"static __inline__ unsigned __int64 __cdecl __DEFAULT_FN_ATTRS\n"
22382"_xgetbv(unsigned int __xcr_no) {\n"
22383" unsigned int __eax, __edx;\n"
22384" __asm__ (\"xgetbv\" : \"=a\" (__eax), \"=d\" (__edx) : \"c\" (__xcr_no));\n"
22385" return ((unsigned __int64)__edx << 32) | __eax;\n"
22386"}\n"
22387"static __inline__ void __DEFAULT_FN_ATTRS\n"
22388"__halt(void) {\n"
22389" __asm__ volatile (\"hlt\");\n"
22390"}\n"
22391"static __inline__ void __DEFAULT_FN_ATTRS\n"
22392"__nop(void) {\n"
22393" __asm__ volatile (\"nop\");\n"
22394"}\n"
22395"#endif\n"
22396"\n"
22397"/*----------------------------------------------------------------------------*\\\n"
22398"|* Privileged intrinsics\n"
22399"\\*----------------------------------------------------------------------------*/\n"
22400"#if defined(__i386__) || defined(__x86_64__)\n"
22401"static __inline__ unsigned __int64 __DEFAULT_FN_ATTRS\n"
22402"__readmsr(unsigned long __register) {\n"
22403" // Loads the contents of a 64-bit model specific register (MSR) specified in\n"
22404" // the ECX register into registers EDX:EAX. The EDX register is loaded with\n"
22405" // the high-order 32 bits of the MSR and the EAX register is loaded with the\n"
22406" // low-order 32 bits. If less than 64 bits are implemented in the MSR being\n"
22407" // read, the values returned to EDX:EAX in unimplemented bit locations are\n"
22408" // undefined.\n"
22409" unsigned long __edx;\n"
22410" unsigned long __eax;\n"
22411" __asm__ (\"rdmsr\" : \"=d\"(__edx), \"=a\"(__eax) : \"c\"(__register));\n"
22412" return (((unsigned __int64)__edx) << 32) | (unsigned __int64)__eax;\n"
22413"}\n"
22414"\n"
22415"static __inline__ unsigned long __DEFAULT_FN_ATTRS\n"
22416"__readcr3(void) {\n"
22417" unsigned long __cr3_val;\n"
22418" __asm__ __volatile__ (\"mov %%cr3, %0\" : \"=q\"(__cr3_val) : : \"memory\");\n"
22419" return __cr3_val;\n"
22420"}\n"
22421"\n"
22422"static __inline__ void __DEFAULT_FN_ATTRS\n"
22423"__writecr3(unsigned int __cr3_val) {\n"
22424" __asm__ (\"mov %0, %%cr3\" : : \"q\"(__cr3_val) : \"memory\");\n"
22425"}\n"
22426"#endif\n"
22427"\n"
22428"#ifdef __cplusplus\n"
22429"}\n"
22430"#endif\n"
22431"\n"
22432"#undef __DEFAULT_FN_ATTRS\n"
22433"\n"
22434"#endif /* __INTRIN_H */\n"
22435"#endif /* _MSC_VER */\n"
22436"" } ,
22437 { "/builtins/inttypes.h" , "/*===---- inttypes.h - Standard header for integer printf macros ----------===*\\\n"
22438" *\n"
22439" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
22440" * of this software and associated documentation files (the \"Software\"), to deal\n"
22441" * in the Software without restriction, including without limitation the rights\n"
22442" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
22443" * copies of the Software, and to permit persons to whom the Software is\n"
22444" * furnished to do so, subject to the following conditions:\n"
22445" *\n"
22446" * The above copyright notice and this permission notice shall be included in\n"
22447" * all copies or substantial portions of the Software.\n"
22448" *\n"
22449" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
22450" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
22451" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
22452" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
22453" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
22454" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
22455" * THE SOFTWARE.\n"
22456" *\n"
22457"\\*===----------------------------------------------------------------------===*/\n"
22458"\n"
22459"#ifndef __CLANG_INTTYPES_H\n"
22460"#define __CLANG_INTTYPES_H\n"
22461"\n"
22462"#if defined(_MSC_VER) && _MSC_VER < 1800\n"
22463"#error MSVC does not have inttypes.h prior to Visual Studio 2013\n"
22464"#endif\n"
22465"\n"
22466"#include_next <inttypes.h>\n"
22467"\n"
22468"#if defined(_MSC_VER) && _MSC_VER < 1900\n"
22469"/* MSVC headers define int32_t as int, but PRIx32 as \"lx\" instead of \"x\".\n"
22470" * This triggers format warnings, so fix it up here. */\n"
22471"#undef PRId32\n"
22472"#undef PRIdLEAST32\n"
22473"#undef PRIdFAST32\n"
22474"#undef PRIi32\n"
22475"#undef PRIiLEAST32\n"
22476"#undef PRIiFAST32\n"
22477"#undef PRIo32\n"
22478"#undef PRIoLEAST32\n"
22479"#undef PRIoFAST32\n"
22480"#undef PRIu32\n"
22481"#undef PRIuLEAST32\n"
22482"#undef PRIuFAST32\n"
22483"#undef PRIx32\n"
22484"#undef PRIxLEAST32\n"
22485"#undef PRIxFAST32\n"
22486"#undef PRIX32\n"
22487"#undef PRIXLEAST32\n"
22488"#undef PRIXFAST32\n"
22489"\n"
22490"#undef SCNd32\n"
22491"#undef SCNdLEAST32\n"
22492"#undef SCNdFAST32\n"
22493"#undef SCNi32\n"
22494"#undef SCNiLEAST32\n"
22495"#undef SCNiFAST32\n"
22496"#undef SCNo32\n"
22497"#undef SCNoLEAST32\n"
22498"#undef SCNoFAST32\n"
22499"#undef SCNu32\n"
22500"#undef SCNuLEAST32\n"
22501"#undef SCNuFAST32\n"
22502"#undef SCNx32\n"
22503"#undef SCNxLEAST32\n"
22504"#undef SCNxFAST32\n"
22505"\n"
22506"#define PRId32 \"d\"\n"
22507"#define PRIdLEAST32 \"d\"\n"
22508"#define PRIdFAST32 \"d\"\n"
22509"#define PRIi32 \"i\"\n"
22510"#define PRIiLEAST32 \"i\"\n"
22511"#define PRIiFAST32 \"i\"\n"
22512"#define PRIo32 \"o\"\n"
22513"#define PRIoLEAST32 \"o\"\n"
22514"#define PRIoFAST32 \"o\"\n"
22515"#define PRIu32 \"u\"\n"
22516"#define PRIuLEAST32 \"u\"\n"
22517"#define PRIuFAST32 \"u\"\n"
22518"#define PRIx32 \"x\"\n"
22519"#define PRIxLEAST32 \"x\"\n"
22520"#define PRIxFAST32 \"x\"\n"
22521"#define PRIX32 \"X\"\n"
22522"#define PRIXLEAST32 \"X\"\n"
22523"#define PRIXFAST32 \"X\"\n"
22524"\n"
22525"#define SCNd32 \"d\"\n"
22526"#define SCNdLEAST32 \"d\"\n"
22527"#define SCNdFAST32 \"d\"\n"
22528"#define SCNi32 \"i\"\n"
22529"#define SCNiLEAST32 \"i\"\n"
22530"#define SCNiFAST32 \"i\"\n"
22531"#define SCNo32 \"o\"\n"
22532"#define SCNoLEAST32 \"o\"\n"
22533"#define SCNoFAST32 \"o\"\n"
22534"#define SCNu32 \"u\"\n"
22535"#define SCNuLEAST32 \"u\"\n"
22536"#define SCNuFAST32 \"u\"\n"
22537"#define SCNx32 \"x\"\n"
22538"#define SCNxLEAST32 \"x\"\n"
22539"#define SCNxFAST32 \"x\"\n"
22540"#endif\n"
22541"\n"
22542"#endif /* __CLANG_INTTYPES_H */\n"
22543"" } ,
22544 { "/builtins/invpcidintrin.h" , "/*===------------- invpcidintrin.h - INVPCID intrinsic ---------------------===\n"
22545" *\n"
22546" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
22547" * of this software and associated documentation files (the \"Software\"), to deal\n"
22548" * in the Software without restriction, including without limitation the rights\n"
22549" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
22550" * copies of the Software, and to permit persons to whom the Software is\n"
22551" * furnished to do so, subject to the following conditions:\n"
22552" *\n"
22553" * The above copyright notice and this permission notice shall be included in\n"
22554" * all copies or substantial portions of the Software.\n"
22555" *\n"
22556" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
22557" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
22558" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
22559" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
22560" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
22561" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
22562" * THE SOFTWARE.\n"
22563" *\n"
22564" *===-----------------------------------------------------------------------===\n"
22565" */\n"
22566"\n"
22567"#ifndef __IMMINTRIN_H\n"
22568"#error \"Never use <invpcidintrin.h> directly; include <immintrin.h> instead.\"\n"
22569"#endif\n"
22570"\n"
22571"#ifndef __INVPCIDINTRIN_H\n"
22572"#define __INVPCIDINTRIN_H\n"
22573"\n"
22574"static __inline__ void\n"
22575" __attribute__((__always_inline__, __nodebug__, __target__(\"invpcid\")))\n"
22576"_invpcid(unsigned int __type, void *__descriptor) {\n"
22577" __builtin_ia32_invpcid(__type, __descriptor);\n"
22578"}\n"
22579"\n"
22580"#endif /* __INVPCIDINTRIN_H */\n"
22581"" } ,
22582 { "/builtins/iso646.h" , "/*===---- iso646.h - Standard header for alternate spellings of operators---===\n"
22583" *\n"
22584" * Copyright (c) 2008 Eli Friedman\n"
22585" *\n"
22586" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
22587" * of this software and associated documentation files (the \"Software\"), to deal\n"
22588" * in the Software without restriction, including without limitation the rights\n"
22589" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
22590" * copies of the Software, and to permit persons to whom the Software is\n"
22591" * furnished to do so, subject to the following conditions:\n"
22592" *\n"
22593" * The above copyright notice and this permission notice shall be included in\n"
22594" * all copies or substantial portions of the Software.\n"
22595" *\n"
22596" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
22597" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
22598" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
22599" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
22600" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
22601" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
22602" * THE SOFTWARE.\n"
22603" *\n"
22604" *===-----------------------------------------------------------------------===\n"
22605" */\n"
22606"\n"
22607"#ifndef __ISO646_H\n"
22608"#define __ISO646_H\n"
22609"\n"
22610"#ifndef __cplusplus\n"
22611"#define and &&\n"
22612"#define and_eq &=\n"
22613"#define bitand &\n"
22614"#define bitor |\n"
22615"#define compl ~\n"
22616"#define not !\n"
22617"#define not_eq !=\n"
22618"#define or ||\n"
22619"#define or_eq |=\n"
22620"#define xor ^\n"
22621"#define xor_eq ^=\n"
22622"#endif\n"
22623"\n"
22624"#endif /* __ISO646_H */\n"
22625"" } ,
22626 { "/builtins/limits.h" , "/*===---- limits.h - Standard header for integer sizes --------------------===*\\\n"
22627" *\n"
22628" * Copyright (c) 2009 Chris Lattner\n"
22629" *\n"
22630" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
22631" * of this software and associated documentation files (the \"Software\"), to deal\n"
22632" * in the Software without restriction, including without limitation the rights\n"
22633" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
22634" * copies of the Software, and to permit persons to whom the Software is\n"
22635" * furnished to do so, subject to the following conditions:\n"
22636" *\n"
22637" * The above copyright notice and this permission notice shall be included in\n"
22638" * all copies or substantial portions of the Software.\n"
22639" *\n"
22640" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
22641" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
22642" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
22643" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
22644" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
22645" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
22646" * THE SOFTWARE.\n"
22647" *\n"
22648"\\*===----------------------------------------------------------------------===*/\n"
22649"\n"
22650"#ifndef __CLANG_LIMITS_H\n"
22651"#define __CLANG_LIMITS_H\n"
22652"\n"
22653"/* The system's limits.h may, in turn, try to #include_next GCC's limits.h.\n"
22654" Avert this #include_next madness. */\n"
22655"#if defined __GNUC__ && !defined _GCC_LIMITS_H_\n"
22656"#define _GCC_LIMITS_H_\n"
22657"#endif\n"
22658"\n"
22659"/* System headers include a number of constants from POSIX in <limits.h>.\n"
22660" Include it if we're hosted. */\n"
22661"#if __STDC_HOSTED__ && __has_include_next(<limits.h>)\n"
22662"#include_next <limits.h>\n"
22663"#endif\n"
22664"\n"
22665"/* Many system headers try to \"help us out\" by defining these. No really, we\n"
22666" know how big each datatype is. */\n"
22667"#undef SCHAR_MIN\n"
22668"#undef SCHAR_MAX\n"
22669"#undef UCHAR_MAX\n"
22670"#undef SHRT_MIN\n"
22671"#undef SHRT_MAX\n"
22672"#undef USHRT_MAX\n"
22673"#undef INT_MIN\n"
22674"#undef INT_MAX\n"
22675"#undef UINT_MAX\n"
22676"#undef LONG_MIN\n"
22677"#undef LONG_MAX\n"
22678"#undef ULONG_MAX\n"
22679"\n"
22680"#undef CHAR_BIT\n"
22681"#undef CHAR_MIN\n"
22682"#undef CHAR_MAX\n"
22683"\n"
22684"/* C90/99 5.2.4.2.1 */\n"
22685"#define SCHAR_MAX __SCHAR_MAX__\n"
22686"#define SHRT_MAX __SHRT_MAX__\n"
22687"#define INT_MAX __INT_MAX__\n"
22688"#define LONG_MAX __LONG_MAX__\n"
22689"\n"
22690"#define SCHAR_MIN (-__SCHAR_MAX__-1)\n"
22691"#define SHRT_MIN (-__SHRT_MAX__ -1)\n"
22692"#define INT_MIN (-__INT_MAX__ -1)\n"
22693"#define LONG_MIN (-__LONG_MAX__ -1L)\n"
22694"\n"
22695"#define UCHAR_MAX (__SCHAR_MAX__*2 +1)\n"
22696"#define USHRT_MAX (__SHRT_MAX__ *2 +1)\n"
22697"#define UINT_MAX (__INT_MAX__ *2U +1U)\n"
22698"#define ULONG_MAX (__LONG_MAX__ *2UL+1UL)\n"
22699"\n"
22700"#ifndef MB_LEN_MAX\n"
22701"#define MB_LEN_MAX 1\n"
22702"#endif\n"
22703"\n"
22704"#define CHAR_BIT __CHAR_BIT__\n"
22705"\n"
22706"#ifdef __CHAR_UNSIGNED__ /* -funsigned-char */\n"
22707"#define CHAR_MIN 0\n"
22708"#define CHAR_MAX UCHAR_MAX\n"
22709"#else\n"
22710"#define CHAR_MIN SCHAR_MIN\n"
22711"#define CHAR_MAX __SCHAR_MAX__\n"
22712"#endif\n"
22713"\n"
22714"/* C99 5.2.4.2.1: Added long long.\n"
22715" C++11 18.3.3.2: same contents as the Standard C Library header <limits.h>.\n"
22716" */\n"
22717"#if __STDC_VERSION__ >= 199901L || __cplusplus >= 201103L\n"
22718"\n"
22719"#undef LLONG_MIN\n"
22720"#undef LLONG_MAX\n"
22721"#undef ULLONG_MAX\n"
22722"\n"
22723"#define LLONG_MAX __LONG_LONG_MAX__\n"
22724"#define LLONG_MIN (-__LONG_LONG_MAX__-1LL)\n"
22725"#define ULLONG_MAX (__LONG_LONG_MAX__*2ULL+1ULL)\n"
22726"#endif\n"
22727"\n"
22728"/* LONG_LONG_MIN/LONG_LONG_MAX/ULONG_LONG_MAX are a GNU extension. It's too bad\n"
22729" that we don't have something like #pragma poison that could be used to\n"
22730" deprecate a macro - the code should just use LLONG_MAX and friends.\n"
22731" */\n"
22732"#if defined(__GNU_LIBRARY__) ? defined(__USE_GNU) : !defined(__STRICT_ANSI__)\n"
22733"\n"
22734"#undef LONG_LONG_MIN\n"
22735"#undef LONG_LONG_MAX\n"
22736"#undef ULONG_LONG_MAX\n"
22737"\n"
22738"#define LONG_LONG_MAX __LONG_LONG_MAX__\n"
22739"#define LONG_LONG_MIN (-__LONG_LONG_MAX__-1LL)\n"
22740"#define ULONG_LONG_MAX (__LONG_LONG_MAX__*2ULL+1ULL)\n"
22741"#endif\n"
22742"\n"
22743"#endif /* __CLANG_LIMITS_H */\n"
22744"" } ,
22745 { "/builtins/lwpintrin.h" , "/*===---- lwpintrin.h - LWP intrinsics -------------------------------------===\n"
22746" *\n"
22747" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
22748" * of this software and associated documentation files (the \"Software\"), to deal\n"
22749" * in the Software without restriction, including without limitation the rights\n"
22750" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
22751" * copies of the Software, and to permit persons to whom the Software is\n"
22752" * furnished to do so, subject to the following conditions:\n"
22753" *\n"
22754" * The above copyright notice and this permission notice shall be included in\n"
22755" * all copies or substantial portions of the Software.\n"
22756" *\n"
22757" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
22758" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
22759" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
22760" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
22761" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
22762" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
22763" * THE SOFTWARE.\n"
22764" *\n"
22765" *===-----------------------------------------------------------------------===\n"
22766" */\n"
22767"\n"
22768"#ifndef __X86INTRIN_H\n"
22769"#error \"Never use <lwpintrin.h> directly; include <x86intrin.h> instead.\"\n"
22770"#endif\n"
22771"\n"
22772"#ifndef __LWPINTRIN_H\n"
22773"#define __LWPINTRIN_H\n"
22774"\n"
22775"/* Define the default attributes for the functions in this file. */\n"
22776"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"lwp\")))\n"
22777"\n"
22778"/// Parses the LWPCB at the specified address and enables\n"
22779"/// profiling if valid.\n"
22780"///\n"
22781"/// \\headerfile <x86intrin.h>\n"
22782"///\n"
22783"/// This intrinsic corresponds to the <c> LLWPCB </c> instruction.\n"
22784"///\n"
22785"/// \\param __addr\n"
22786"/// Address to the new Lightweight Profiling Control Block (LWPCB). If the\n"
22787"/// LWPCB is valid, writes the address into the LWP_CBADDR MSR and enables\n"
22788"/// Lightweight Profiling.\n"
22789"static __inline__ void __DEFAULT_FN_ATTRS\n"
22790"__llwpcb (void *__addr)\n"
22791"{\n"
22792" __builtin_ia32_llwpcb(__addr);\n"
22793"}\n"
22794"\n"
22795"/// Flushes the LWP state to memory and returns the address of the LWPCB.\n"
22796"///\n"
22797"/// \\headerfile <x86intrin.h>\n"
22798"///\n"
22799"/// This intrinsic corresponds to the <c> SLWPCB </c> instruction.\n"
22800"///\n"
22801"/// \\return\n"
22802"/// Address to the current Lightweight Profiling Control Block (LWPCB).\n"
22803"/// If LWP is not currently enabled, returns NULL.\n"
22804"static __inline__ void* __DEFAULT_FN_ATTRS\n"
22805"__slwpcb (void)\n"
22806"{\n"
22807" return __builtin_ia32_slwpcb();\n"
22808"}\n"
22809"\n"
22810"/// Inserts programmed event record into the LWP event ring buffer\n"
22811"/// and advances the ring buffer pointer.\n"
22812"///\n"
22813"/// \\headerfile <x86intrin.h>\n"
22814"///\n"
22815"/// This intrinsic corresponds to the <c> LWPINS </c> instruction.\n"
22816"///\n"
22817"/// \\param DATA2\n"
22818"/// A 32-bit value is zero-extended and inserted into the 64-bit Data2 field.\n"
22819"/// \\param DATA1\n"
22820"/// A 32-bit value is inserted into the 32-bit Data1 field.\n"
22821"/// \\param FLAGS\n"
22822"/// A 32-bit immediate value is inserted into the 32-bit Flags field.\n"
22823"/// \\returns If the ring buffer is full and LWP is running in Synchronized Mode,\n"
22824"/// the event record overwrites the last record in the buffer, the MissedEvents\n"
22825"/// counter in the LWPCB is incremented, the head pointer is not advanced, and\n"
22826"/// 1 is returned. Otherwise 0 is returned.\n"
22827"#define __lwpins32(DATA2, DATA1, FLAGS) \\\n"
22828" (__builtin_ia32_lwpins32((unsigned int) (DATA2), (unsigned int) (DATA1), \\\n"
22829" (unsigned int) (FLAGS)))\n"
22830"\n"
22831"/// Decrements the LWP programmed value sample event counter. If the result is\n"
22832"/// negative, inserts an event record into the LWP event ring buffer in memory\n"
22833"/// and advances the ring buffer pointer.\n"
22834"///\n"
22835"/// \\headerfile <x86intrin.h>\n"
22836"///\n"
22837"/// This intrinsic corresponds to the <c> LWPVAL </c> instruction.\n"
22838"///\n"
22839"/// \\param DATA2\n"
22840"/// A 32-bit value is zero-extended and inserted into the 64-bit Data2 field.\n"
22841"/// \\param DATA1\n"
22842"/// A 32-bit value is inserted into the 32-bit Data1 field.\n"
22843"/// \\param FLAGS\n"
22844"/// A 32-bit immediate value is inserted into the 32-bit Flags field.\n"
22845"#define __lwpval32(DATA2, DATA1, FLAGS) \\\n"
22846" (__builtin_ia32_lwpval32((unsigned int) (DATA2), (unsigned int) (DATA1), \\\n"
22847" (unsigned int) (FLAGS)))\n"
22848"\n"
22849"#ifdef __x86_64__\n"
22850"\n"
22851"/// Inserts programmed event record into the LWP event ring buffer\n"
22852"/// and advances the ring buffer pointer.\n"
22853"///\n"
22854"/// \\headerfile <x86intrin.h>\n"
22855"///\n"
22856"/// This intrinsic corresponds to the <c> LWPINS </c> instruction.\n"
22857"///\n"
22858"/// \\param DATA2\n"
22859"/// A 64-bit value is inserted into the 64-bit Data2 field.\n"
22860"/// \\param DATA1\n"
22861"/// A 32-bit value is inserted into the 32-bit Data1 field.\n"
22862"/// \\param FLAGS\n"
22863"/// A 32-bit immediate value is inserted into the 32-bit Flags field.\n"
22864"/// \\returns If the ring buffer is full and LWP is running in Synchronized Mode,\n"
22865"/// the event record overwrites the last record in the buffer, the MissedEvents\n"
22866"/// counter in the LWPCB is incremented, the head pointer is not advanced, and\n"
22867"/// 1 is returned. Otherwise 0 is returned.\n"
22868"#define __lwpins64(DATA2, DATA1, FLAGS) \\\n"
22869" (__builtin_ia32_lwpins64((unsigned long long) (DATA2), (unsigned int) (DATA1), \\\n"
22870" (unsigned int) (FLAGS)))\n"
22871"\n"
22872"/// Decrements the LWP programmed value sample event counter. If the result is\n"
22873"/// negative, inserts an event record into the LWP event ring buffer in memory\n"
22874"/// and advances the ring buffer pointer.\n"
22875"///\n"
22876"/// \\headerfile <x86intrin.h>\n"
22877"///\n"
22878"/// This intrinsic corresponds to the <c> LWPVAL </c> instruction.\n"
22879"///\n"
22880"/// \\param DATA2\n"
22881"/// A 64-bit value is and inserted into the 64-bit Data2 field.\n"
22882"/// \\param DATA1\n"
22883"/// A 32-bit value is inserted into the 32-bit Data1 field.\n"
22884"/// \\param FLAGS\n"
22885"/// A 32-bit immediate value is inserted into the 32-bit Flags field.\n"
22886"#define __lwpval64(DATA2, DATA1, FLAGS) \\\n"
22887" (__builtin_ia32_lwpval64((unsigned long long) (DATA2), (unsigned int) (DATA1), \\\n"
22888" (unsigned int) (FLAGS)))\n"
22889"\n"
22890"#endif\n"
22891"\n"
22892"#undef __DEFAULT_FN_ATTRS\n"
22893"\n"
22894"#endif /* __LWPINTRIN_H */\n"
22895"" } ,
22896 { "/builtins/lzcntintrin.h" , "/*===---- lzcntintrin.h - LZCNT intrinsics ---------------------------------===\n"
22897" *\n"
22898" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
22899" * of this software and associated documentation files (the \"Software\"), to deal\n"
22900" * in the Software without restriction, including without limitation the rights\n"
22901" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
22902" * copies of the Software, and to permit persons to whom the Software is\n"
22903" * furnished to do so, subject to the following conditions:\n"
22904" *\n"
22905" * The above copyright notice and this permission notice shall be included in\n"
22906" * all copies or substantial portions of the Software.\n"
22907" *\n"
22908" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
22909" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
22910" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
22911" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
22912" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
22913" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
22914" * THE SOFTWARE.\n"
22915" *\n"
22916" *===-----------------------------------------------------------------------===\n"
22917" */\n"
22918"\n"
22919"#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n"
22920"#error \"Never use <lzcntintrin.h> directly; include <x86intrin.h> instead.\"\n"
22921"#endif\n"
22922"\n"
22923"#ifndef __LZCNTINTRIN_H\n"
22924"#define __LZCNTINTRIN_H\n"
22925"\n"
22926"/* Define the default attributes for the functions in this file. */\n"
22927"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"lzcnt\")))\n"
22928"\n"
22929"/// Counts the number of leading zero bits in the operand.\n"
22930"///\n"
22931"/// \\headerfile <x86intrin.h>\n"
22932"///\n"
22933"/// This intrinsic corresponds to the \\c LZCNT instruction.\n"
22934"///\n"
22935"/// \\param __X\n"
22936"/// An unsigned 16-bit integer whose leading zeros are to be counted.\n"
22937"/// \\returns An unsigned 16-bit integer containing the number of leading zero\n"
22938"/// bits in the operand.\n"
22939"static __inline__ unsigned short __DEFAULT_FN_ATTRS\n"
22940"__lzcnt16(unsigned short __X)\n"
22941"{\n"
22942" return __X ? __builtin_clzs(__X) : 16;\n"
22943"}\n"
22944"\n"
22945"/// Counts the number of leading zero bits in the operand.\n"
22946"///\n"
22947"/// \\headerfile <x86intrin.h>\n"
22948"///\n"
22949"/// This intrinsic corresponds to the \\c LZCNT instruction.\n"
22950"///\n"
22951"/// \\param __X\n"
22952"/// An unsigned 32-bit integer whose leading zeros are to be counted.\n"
22953"/// \\returns An unsigned 32-bit integer containing the number of leading zero\n"
22954"/// bits in the operand.\n"
22955"/// \\see _lzcnt_u32\n"
22956"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
22957"__lzcnt32(unsigned int __X)\n"
22958"{\n"
22959" return __X ? __builtin_clz(__X) : 32;\n"
22960"}\n"
22961"\n"
22962"/// Counts the number of leading zero bits in the operand.\n"
22963"///\n"
22964"/// \\headerfile <x86intrin.h>\n"
22965"///\n"
22966"/// This intrinsic corresponds to the \\c LZCNT instruction.\n"
22967"///\n"
22968"/// \\param __X\n"
22969"/// An unsigned 32-bit integer whose leading zeros are to be counted.\n"
22970"/// \\returns An unsigned 32-bit integer containing the number of leading zero\n"
22971"/// bits in the operand.\n"
22972"/// \\see __lzcnt32\n"
22973"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
22974"_lzcnt_u32(unsigned int __X)\n"
22975"{\n"
22976" return __X ? __builtin_clz(__X) : 32;\n"
22977"}\n"
22978"\n"
22979"#ifdef __x86_64__\n"
22980"/// Counts the number of leading zero bits in the operand.\n"
22981"///\n"
22982"/// \\headerfile <x86intrin.h>\n"
22983"///\n"
22984"/// This intrinsic corresponds to the \\c LZCNT instruction.\n"
22985"///\n"
22986"/// \\param __X\n"
22987"/// An unsigned 64-bit integer whose leading zeros are to be counted.\n"
22988"/// \\returns An unsigned 64-bit integer containing the number of leading zero\n"
22989"/// bits in the operand.\n"
22990"/// \\see _lzcnt_u64\n"
22991"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
22992"__lzcnt64(unsigned long long __X)\n"
22993"{\n"
22994" return __X ? __builtin_clzll(__X) : 64;\n"
22995"}\n"
22996"\n"
22997"/// Counts the number of leading zero bits in the operand.\n"
22998"///\n"
22999"/// \\headerfile <x86intrin.h>\n"
23000"///\n"
23001"/// This intrinsic corresponds to the \\c LZCNT instruction.\n"
23002"///\n"
23003"/// \\param __X\n"
23004"/// An unsigned 64-bit integer whose leading zeros are to be counted.\n"
23005"/// \\returns An unsigned 64-bit integer containing the number of leading zero\n"
23006"/// bits in the operand.\n"
23007"/// \\see __lzcnt64\n"
23008"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
23009"_lzcnt_u64(unsigned long long __X)\n"
23010"{\n"
23011" return __X ? __builtin_clzll(__X) : 64;\n"
23012"}\n"
23013"#endif\n"
23014"\n"
23015"#undef __DEFAULT_FN_ATTRS\n"
23016"\n"
23017"#endif /* __LZCNTINTRIN_H */\n"
23018"" } ,
23019 { "/builtins/mm3dnow.h" , "/*===---- mm3dnow.h - 3DNow! intrinsics ------------------------------------===\n"
23020" *\n"
23021" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
23022" * of this software and associated documentation files (the \"Software\"), to deal\n"
23023" * in the Software without restriction, including without limitation the rights\n"
23024" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
23025" * copies of the Software, and to permit persons to whom the Software is\n"
23026" * furnished to do so, subject to the following conditions:\n"
23027" *\n"
23028" * The above copyright notice and this permission notice shall be included in\n"
23029" * all copies or substantial portions of the Software.\n"
23030" *\n"
23031" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
23032" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
23033" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
23034" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
23035" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
23036" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
23037" * THE SOFTWARE.\n"
23038" *\n"
23039" *===-----------------------------------------------------------------------===\n"
23040" */\n"
23041"\n"
23042"#ifndef _MM3DNOW_H_INCLUDED\n"
23043"#define _MM3DNOW_H_INCLUDED\n"
23044"\n"
23045"#include <mmintrin.h>\n"
23046"#include <prfchwintrin.h>\n"
23047"\n"
23048"typedef float __v2sf __attribute__((__vector_size__(8)));\n"
23049"\n"
23050"/* Define the default attributes for the functions in this file. */\n"
23051"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"3dnow\"), __min_vector_width__(64)))\n"
23052"\n"
23053"static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"3dnow\")))\n"
23054"_m_femms(void) {\n"
23055" __builtin_ia32_femms();\n"
23056"}\n"
23057"\n"
23058"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23059"_m_pavgusb(__m64 __m1, __m64 __m2) {\n"
23060" return (__m64)__builtin_ia32_pavgusb((__v8qi)__m1, (__v8qi)__m2);\n"
23061"}\n"
23062"\n"
23063"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23064"_m_pf2id(__m64 __m) {\n"
23065" return (__m64)__builtin_ia32_pf2id((__v2sf)__m);\n"
23066"}\n"
23067"\n"
23068"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23069"_m_pfacc(__m64 __m1, __m64 __m2) {\n"
23070" return (__m64)__builtin_ia32_pfacc((__v2sf)__m1, (__v2sf)__m2);\n"
23071"}\n"
23072"\n"
23073"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23074"_m_pfadd(__m64 __m1, __m64 __m2) {\n"
23075" return (__m64)__builtin_ia32_pfadd((__v2sf)__m1, (__v2sf)__m2);\n"
23076"}\n"
23077"\n"
23078"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23079"_m_pfcmpeq(__m64 __m1, __m64 __m2) {\n"
23080" return (__m64)__builtin_ia32_pfcmpeq((__v2sf)__m1, (__v2sf)__m2);\n"
23081"}\n"
23082"\n"
23083"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23084"_m_pfcmpge(__m64 __m1, __m64 __m2) {\n"
23085" return (__m64)__builtin_ia32_pfcmpge((__v2sf)__m1, (__v2sf)__m2);\n"
23086"}\n"
23087"\n"
23088"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23089"_m_pfcmpgt(__m64 __m1, __m64 __m2) {\n"
23090" return (__m64)__builtin_ia32_pfcmpgt((__v2sf)__m1, (__v2sf)__m2);\n"
23091"}\n"
23092"\n"
23093"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23094"_m_pfmax(__m64 __m1, __m64 __m2) {\n"
23095" return (__m64)__builtin_ia32_pfmax((__v2sf)__m1, (__v2sf)__m2);\n"
23096"}\n"
23097"\n"
23098"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23099"_m_pfmin(__m64 __m1, __m64 __m2) {\n"
23100" return (__m64)__builtin_ia32_pfmin((__v2sf)__m1, (__v2sf)__m2);\n"
23101"}\n"
23102"\n"
23103"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23104"_m_pfmul(__m64 __m1, __m64 __m2) {\n"
23105" return (__m64)__builtin_ia32_pfmul((__v2sf)__m1, (__v2sf)__m2);\n"
23106"}\n"
23107"\n"
23108"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23109"_m_pfrcp(__m64 __m) {\n"
23110" return (__m64)__builtin_ia32_pfrcp((__v2sf)__m);\n"
23111"}\n"
23112"\n"
23113"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23114"_m_pfrcpit1(__m64 __m1, __m64 __m2) {\n"
23115" return (__m64)__builtin_ia32_pfrcpit1((__v2sf)__m1, (__v2sf)__m2);\n"
23116"}\n"
23117"\n"
23118"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23119"_m_pfrcpit2(__m64 __m1, __m64 __m2) {\n"
23120" return (__m64)__builtin_ia32_pfrcpit2((__v2sf)__m1, (__v2sf)__m2);\n"
23121"}\n"
23122"\n"
23123"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23124"_m_pfrsqrt(__m64 __m) {\n"
23125" return (__m64)__builtin_ia32_pfrsqrt((__v2sf)__m);\n"
23126"}\n"
23127"\n"
23128"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23129"_m_pfrsqrtit1(__m64 __m1, __m64 __m2) {\n"
23130" return (__m64)__builtin_ia32_pfrsqit1((__v2sf)__m1, (__v2sf)__m2);\n"
23131"}\n"
23132"\n"
23133"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23134"_m_pfsub(__m64 __m1, __m64 __m2) {\n"
23135" return (__m64)__builtin_ia32_pfsub((__v2sf)__m1, (__v2sf)__m2);\n"
23136"}\n"
23137"\n"
23138"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23139"_m_pfsubr(__m64 __m1, __m64 __m2) {\n"
23140" return (__m64)__builtin_ia32_pfsubr((__v2sf)__m1, (__v2sf)__m2);\n"
23141"}\n"
23142"\n"
23143"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23144"_m_pi2fd(__m64 __m) {\n"
23145" return (__m64)__builtin_ia32_pi2fd((__v2si)__m);\n"
23146"}\n"
23147"\n"
23148"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23149"_m_pmulhrw(__m64 __m1, __m64 __m2) {\n"
23150" return (__m64)__builtin_ia32_pmulhrw((__v4hi)__m1, (__v4hi)__m2);\n"
23151"}\n"
23152"\n"
23153"/* Handle the 3dnowa instructions here. */\n"
23154"#undef __DEFAULT_FN_ATTRS\n"
23155"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"3dnowa\"), __min_vector_width__(64)))\n"
23156"\n"
23157"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23158"_m_pf2iw(__m64 __m) {\n"
23159" return (__m64)__builtin_ia32_pf2iw((__v2sf)__m);\n"
23160"}\n"
23161"\n"
23162"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23163"_m_pfnacc(__m64 __m1, __m64 __m2) {\n"
23164" return (__m64)__builtin_ia32_pfnacc((__v2sf)__m1, (__v2sf)__m2);\n"
23165"}\n"
23166"\n"
23167"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23168"_m_pfpnacc(__m64 __m1, __m64 __m2) {\n"
23169" return (__m64)__builtin_ia32_pfpnacc((__v2sf)__m1, (__v2sf)__m2);\n"
23170"}\n"
23171"\n"
23172"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23173"_m_pi2fw(__m64 __m) {\n"
23174" return (__m64)__builtin_ia32_pi2fw((__v2si)__m);\n"
23175"}\n"
23176"\n"
23177"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23178"_m_pswapdsf(__m64 __m) {\n"
23179" return (__m64)__builtin_ia32_pswapdsf((__v2sf)__m);\n"
23180"}\n"
23181"\n"
23182"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23183"_m_pswapdsi(__m64 __m) {\n"
23184" return (__m64)__builtin_ia32_pswapdsi((__v2si)__m);\n"
23185"}\n"
23186"\n"
23187"#undef __DEFAULT_FN_ATTRS\n"
23188"\n"
23189"#endif\n"
23190"" } ,
23191 { "/builtins/mm_malloc.h" , "/*===---- mm_malloc.h - Allocating and Freeing Aligned Memory Blocks -------===\n"
23192" *\n"
23193" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
23194" * of this software and associated documentation files (the \"Software\"), to deal\n"
23195" * in the Software without restriction, including without limitation the rights\n"
23196" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
23197" * copies of the Software, and to permit persons to whom the Software is\n"
23198" * furnished to do so, subject to the following conditions:\n"
23199" *\n"
23200" * The above copyright notice and this permission notice shall be included in\n"
23201" * all copies or substantial portions of the Software.\n"
23202" *\n"
23203" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
23204" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
23205" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
23206" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
23207" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
23208" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
23209" * THE SOFTWARE.\n"
23210" *\n"
23211" *===-----------------------------------------------------------------------===\n"
23212" */\n"
23213"\n"
23214"#ifndef __MM_MALLOC_H\n"
23215"#define __MM_MALLOC_H\n"
23216"\n"
23217"#include <stdlib.h>\n"
23218"\n"
23219"#ifdef _WIN32\n"
23220"#include <malloc.h>\n"
23221"#else\n"
23222"#ifndef __cplusplus\n"
23223"extern int posix_memalign(void **__memptr, size_t __alignment, size_t __size);\n"
23224"#else\n"
23225"// Some systems (e.g. those with GNU libc) declare posix_memalign with an\n"
23226"// exception specifier. Via an \"egregious workaround\" in\n"
23227"// Sema::CheckEquivalentExceptionSpec, Clang accepts the following as a valid\n"
23228"// redeclaration of glibc's declaration.\n"
23229"extern \"C\" int posix_memalign(void **__memptr, size_t __alignment, size_t __size);\n"
23230"#endif\n"
23231"#endif\n"
23232"\n"
23233"#if !(defined(_WIN32) && defined(_mm_malloc))\n"
23234"static __inline__ void *__attribute__((__always_inline__, __nodebug__,\n"
23235" __malloc__))\n"
23236"_mm_malloc(size_t __size, size_t __align)\n"
23237"{\n"
23238" if (__align == 1) {\n"
23239" return malloc(__size);\n"
23240" }\n"
23241"\n"
23242" if (!(__align & (__align - 1)) && __align < sizeof(void *))\n"
23243" __align = sizeof(void *);\n"
23244"\n"
23245" void *__mallocedMemory;\n"
23246"#if defined(__MINGW32__)\n"
23247" __mallocedMemory = __mingw_aligned_malloc(__size, __align);\n"
23248"#elif defined(_WIN32)\n"
23249" __mallocedMemory = _aligned_malloc(__size, __align);\n"
23250"#else\n"
23251" if (posix_memalign(&__mallocedMemory, __align, __size))\n"
23252" return 0;\n"
23253"#endif\n"
23254"\n"
23255" return __mallocedMemory;\n"
23256"}\n"
23257"\n"
23258"static __inline__ void __attribute__((__always_inline__, __nodebug__))\n"
23259"_mm_free(void *__p)\n"
23260"{\n"
23261" free(__p);\n"
23262"}\n"
23263"#endif\n"
23264"\n"
23265"#endif /* __MM_MALLOC_H */\n"
23266"" } ,
23267 { "/builtins/mmintrin.h" , "/*===---- mmintrin.h - MMX intrinsics --------------------------------------===\n"
23268" *\n"
23269" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
23270" * of this software and associated documentation files (the \"Software\"), to deal\n"
23271" * in the Software without restriction, including without limitation the rights\n"
23272" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
23273" * copies of the Software, and to permit persons to whom the Software is\n"
23274" * furnished to do so, subject to the following conditions:\n"
23275" *\n"
23276" * The above copyright notice and this permission notice shall be included in\n"
23277" * all copies or substantial portions of the Software.\n"
23278" *\n"
23279" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
23280" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
23281" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
23282" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
23283" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
23284" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
23285" * THE SOFTWARE.\n"
23286" *\n"
23287" *===-----------------------------------------------------------------------===\n"
23288" */\n"
23289"\n"
23290"#ifndef __MMINTRIN_H\n"
23291"#define __MMINTRIN_H\n"
23292"\n"
23293"typedef long long __m64 __attribute__((__vector_size__(8)));\n"
23294"\n"
23295"typedef long long __v1di __attribute__((__vector_size__(8)));\n"
23296"typedef int __v2si __attribute__((__vector_size__(8)));\n"
23297"typedef short __v4hi __attribute__((__vector_size__(8)));\n"
23298"typedef char __v8qi __attribute__((__vector_size__(8)));\n"
23299"\n"
23300"/* Define the default attributes for the functions in this file. */\n"
23301"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"mmx\"), __min_vector_width__(64)))\n"
23302"\n"
23303"/// Clears the MMX state by setting the state of the x87 stack registers\n"
23304"/// to empty.\n"
23305"///\n"
23306"/// \\headerfile <x86intrin.h>\n"
23307"///\n"
23308"/// This intrinsic corresponds to the <c> EMMS </c> instruction.\n"
23309"///\n"
23310"static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"mmx\")))\n"
23311"_mm_empty(void)\n"
23312"{\n"
23313" __builtin_ia32_emms();\n"
23314"}\n"
23315"\n"
23316"/// Constructs a 64-bit integer vector, setting the lower 32 bits to the\n"
23317"/// value of the 32-bit integer parameter and setting the upper 32 bits to 0.\n"
23318"///\n"
23319"/// \\headerfile <x86intrin.h>\n"
23320"///\n"
23321"/// This intrinsic corresponds to the <c> MOVD </c> instruction.\n"
23322"///\n"
23323"/// \\param __i\n"
23324"/// A 32-bit integer value.\n"
23325"/// \\returns A 64-bit integer vector. The lower 32 bits contain the value of the\n"
23326"/// parameter. The upper 32 bits are set to 0.\n"
23327"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23328"_mm_cvtsi32_si64(int __i)\n"
23329"{\n"
23330" return (__m64)__builtin_ia32_vec_init_v2si(__i, 0);\n"
23331"}\n"
23332"\n"
23333"/// Returns the lower 32 bits of a 64-bit integer vector as a 32-bit\n"
23334"/// signed integer.\n"
23335"///\n"
23336"/// \\headerfile <x86intrin.h>\n"
23337"///\n"
23338"/// This intrinsic corresponds to the <c> MOVD </c> instruction.\n"
23339"///\n"
23340"/// \\param __m\n"
23341"/// A 64-bit integer vector.\n"
23342"/// \\returns A 32-bit signed integer value containing the lower 32 bits of the\n"
23343"/// parameter.\n"
23344"static __inline__ int __DEFAULT_FN_ATTRS\n"
23345"_mm_cvtsi64_si32(__m64 __m)\n"
23346"{\n"
23347" return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0);\n"
23348"}\n"
23349"\n"
23350"/// Casts a 64-bit signed integer value into a 64-bit integer vector.\n"
23351"///\n"
23352"/// \\headerfile <x86intrin.h>\n"
23353"///\n"
23354"/// This intrinsic corresponds to the <c> MOVQ </c> instruction.\n"
23355"///\n"
23356"/// \\param __i\n"
23357"/// A 64-bit signed integer.\n"
23358"/// \\returns A 64-bit integer vector containing the same bitwise pattern as the\n"
23359"/// parameter.\n"
23360"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23361"_mm_cvtsi64_m64(long long __i)\n"
23362"{\n"
23363" return (__m64)__i;\n"
23364"}\n"
23365"\n"
23366"/// Casts a 64-bit integer vector into a 64-bit signed integer value.\n"
23367"///\n"
23368"/// \\headerfile <x86intrin.h>\n"
23369"///\n"
23370"/// This intrinsic corresponds to the <c> MOVQ </c> instruction.\n"
23371"///\n"
23372"/// \\param __m\n"
23373"/// A 64-bit integer vector.\n"
23374"/// \\returns A 64-bit signed integer containing the same bitwise pattern as the\n"
23375"/// parameter.\n"
23376"static __inline__ long long __DEFAULT_FN_ATTRS\n"
23377"_mm_cvtm64_si64(__m64 __m)\n"
23378"{\n"
23379" return (long long)__m;\n"
23380"}\n"
23381"\n"
23382"/// Converts 16-bit signed integers from both 64-bit integer vector\n"
23383"/// parameters of [4 x i16] into 8-bit signed integer values, and constructs\n"
23384"/// a 64-bit integer vector of [8 x i8] as the result. Positive values\n"
23385"/// greater than 0x7F are saturated to 0x7F. Negative values less than 0x80\n"
23386"/// are saturated to 0x80.\n"
23387"///\n"
23388"/// \\headerfile <x86intrin.h>\n"
23389"///\n"
23390"/// This intrinsic corresponds to the <c> PACKSSWB </c> instruction.\n"
23391"///\n"
23392"/// \\param __m1\n"
23393"/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a\n"
23394"/// 16-bit signed integer and is converted to an 8-bit signed integer with\n"
23395"/// saturation. Positive values greater than 0x7F are saturated to 0x7F.\n"
23396"/// Negative values less than 0x80 are saturated to 0x80. The converted\n"
23397"/// [4 x i8] values are written to the lower 32 bits of the result.\n"
23398"/// \\param __m2\n"
23399"/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a\n"
23400"/// 16-bit signed integer and is converted to an 8-bit signed integer with\n"
23401"/// saturation. Positive values greater than 0x7F are saturated to 0x7F.\n"
23402"/// Negative values less than 0x80 are saturated to 0x80. The converted\n"
23403"/// [4 x i8] values are written to the upper 32 bits of the result.\n"
23404"/// \\returns A 64-bit integer vector of [8 x i8] containing the converted\n"
23405"/// values.\n"
23406"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23407"_mm_packs_pi16(__m64 __m1, __m64 __m2)\n"
23408"{\n"
23409" return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);\n"
23410"}\n"
23411"\n"
23412"/// Converts 32-bit signed integers from both 64-bit integer vector\n"
23413"/// parameters of [2 x i32] into 16-bit signed integer values, and constructs\n"
23414"/// a 64-bit integer vector of [4 x i16] as the result. Positive values\n"
23415"/// greater than 0x7FFF are saturated to 0x7FFF. Negative values less than\n"
23416"/// 0x8000 are saturated to 0x8000.\n"
23417"///\n"
23418"/// \\headerfile <x86intrin.h>\n"
23419"///\n"
23420"/// This intrinsic corresponds to the <c> PACKSSDW </c> instruction.\n"
23421"///\n"
23422"/// \\param __m1\n"
23423"/// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a\n"
23424"/// 32-bit signed integer and is converted to a 16-bit signed integer with\n"
23425"/// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.\n"
23426"/// Negative values less than 0x8000 are saturated to 0x8000. The converted\n"
23427"/// [2 x i16] values are written to the lower 32 bits of the result.\n"
23428"/// \\param __m2\n"
23429"/// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a\n"
23430"/// 32-bit signed integer and is converted to a 16-bit signed integer with\n"
23431"/// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.\n"
23432"/// Negative values less than 0x8000 are saturated to 0x8000. The converted\n"
23433"/// [2 x i16] values are written to the upper 32 bits of the result.\n"
23434"/// \\returns A 64-bit integer vector of [4 x i16] containing the converted\n"
23435"/// values.\n"
23436"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23437"_mm_packs_pi32(__m64 __m1, __m64 __m2)\n"
23438"{\n"
23439" return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);\n"
23440"}\n"
23441"\n"
23442"/// Converts 16-bit signed integers from both 64-bit integer vector\n"
23443"/// parameters of [4 x i16] into 8-bit unsigned integer values, and\n"
23444"/// constructs a 64-bit integer vector of [8 x i8] as the result. Values\n"
23445"/// greater than 0xFF are saturated to 0xFF. Values less than 0 are saturated\n"
23446"/// to 0.\n"
23447"///\n"
23448"/// \\headerfile <x86intrin.h>\n"
23449"///\n"
23450"/// This intrinsic corresponds to the <c> PACKUSWB </c> instruction.\n"
23451"///\n"
23452"/// \\param __m1\n"
23453"/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a\n"
23454"/// 16-bit signed integer and is converted to an 8-bit unsigned integer with\n"
23455"/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less\n"
23456"/// than 0 are saturated to 0. The converted [4 x i8] values are written to\n"
23457"/// the lower 32 bits of the result.\n"
23458"/// \\param __m2\n"
23459"/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a\n"
23460"/// 16-bit signed integer and is converted to an 8-bit unsigned integer with\n"
23461"/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less\n"
23462"/// than 0 are saturated to 0. The converted [4 x i8] values are written to\n"
23463"/// the upper 32 bits of the result.\n"
23464"/// \\returns A 64-bit integer vector of [8 x i8] containing the converted\n"
23465"/// values.\n"
23466"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23467"_mm_packs_pu16(__m64 __m1, __m64 __m2)\n"
23468"{\n"
23469" return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2);\n"
23470"}\n"
23471"\n"
23472"/// Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8]\n"
23473"/// and interleaves them into a 64-bit integer vector of [8 x i8].\n"
23474"///\n"
23475"/// \\headerfile <x86intrin.h>\n"
23476"///\n"
23477"/// This intrinsic corresponds to the <c> PUNPCKHBW </c> instruction.\n"
23478"///\n"
23479"/// \\param __m1\n"
23480"/// A 64-bit integer vector of [8 x i8]. \\n\n"
23481"/// Bits [39:32] are written to bits [7:0] of the result. \\n\n"
23482"/// Bits [47:40] are written to bits [23:16] of the result. \\n\n"
23483"/// Bits [55:48] are written to bits [39:32] of the result. \\n\n"
23484"/// Bits [63:56] are written to bits [55:48] of the result.\n"
23485"/// \\param __m2\n"
23486"/// A 64-bit integer vector of [8 x i8].\n"
23487"/// Bits [39:32] are written to bits [15:8] of the result. \\n\n"
23488"/// Bits [47:40] are written to bits [31:24] of the result. \\n\n"
23489"/// Bits [55:48] are written to bits [47:40] of the result. \\n\n"
23490"/// Bits [63:56] are written to bits [63:56] of the result.\n"
23491"/// \\returns A 64-bit integer vector of [8 x i8] containing the interleaved\n"
23492"/// values.\n"
23493"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23494"_mm_unpackhi_pi8(__m64 __m1, __m64 __m2)\n"
23495"{\n"
23496" return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2);\n"
23497"}\n"
23498"\n"
23499"/// Unpacks the upper 32 bits from two 64-bit integer vectors of\n"
23500"/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].\n"
23501"///\n"
23502"/// \\headerfile <x86intrin.h>\n"
23503"///\n"
23504"/// This intrinsic corresponds to the <c> PUNPCKHWD </c> instruction.\n"
23505"///\n"
23506"/// \\param __m1\n"
23507"/// A 64-bit integer vector of [4 x i16].\n"
23508"/// Bits [47:32] are written to bits [15:0] of the result. \\n\n"
23509"/// Bits [63:48] are written to bits [47:32] of the result.\n"
23510"/// \\param __m2\n"
23511"/// A 64-bit integer vector of [4 x i16].\n"
23512"/// Bits [47:32] are written to bits [31:16] of the result. \\n\n"
23513"/// Bits [63:48] are written to bits [63:48] of the result.\n"
23514"/// \\returns A 64-bit integer vector of [4 x i16] containing the interleaved\n"
23515"/// values.\n"
23516"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23517"_mm_unpackhi_pi16(__m64 __m1, __m64 __m2)\n"
23518"{\n"
23519" return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2);\n"
23520"}\n"
23521"\n"
23522"/// Unpacks the upper 32 bits from two 64-bit integer vectors of\n"
23523"/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].\n"
23524"///\n"
23525"/// \\headerfile <x86intrin.h>\n"
23526"///\n"
23527"/// This intrinsic corresponds to the <c> PUNPCKHDQ </c> instruction.\n"
23528"///\n"
23529"/// \\param __m1\n"
23530"/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to\n"
23531"/// the lower 32 bits of the result.\n"
23532"/// \\param __m2\n"
23533"/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to\n"
23534"/// the upper 32 bits of the result.\n"
23535"/// \\returns A 64-bit integer vector of [2 x i32] containing the interleaved\n"
23536"/// values.\n"
23537"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23538"_mm_unpackhi_pi32(__m64 __m1, __m64 __m2)\n"
23539"{\n"
23540" return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2);\n"
23541"}\n"
23542"\n"
23543"/// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]\n"
23544"/// and interleaves them into a 64-bit integer vector of [8 x i8].\n"
23545"///\n"
23546"/// \\headerfile <x86intrin.h>\n"
23547"///\n"
23548"/// This intrinsic corresponds to the <c> PUNPCKLBW </c> instruction.\n"
23549"///\n"
23550"/// \\param __m1\n"
23551"/// A 64-bit integer vector of [8 x i8].\n"
23552"/// Bits [7:0] are written to bits [7:0] of the result. \\n\n"
23553"/// Bits [15:8] are written to bits [23:16] of the result. \\n\n"
23554"/// Bits [23:16] are written to bits [39:32] of the result. \\n\n"
23555"/// Bits [31:24] are written to bits [55:48] of the result.\n"
23556"/// \\param __m2\n"
23557"/// A 64-bit integer vector of [8 x i8].\n"
23558"/// Bits [7:0] are written to bits [15:8] of the result. \\n\n"
23559"/// Bits [15:8] are written to bits [31:24] of the result. \\n\n"
23560"/// Bits [23:16] are written to bits [47:40] of the result. \\n\n"
23561"/// Bits [31:24] are written to bits [63:56] of the result.\n"
23562"/// \\returns A 64-bit integer vector of [8 x i8] containing the interleaved\n"
23563"/// values.\n"
23564"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23565"_mm_unpacklo_pi8(__m64 __m1, __m64 __m2)\n"
23566"{\n"
23567" return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2);\n"
23568"}\n"
23569"\n"
23570"/// Unpacks the lower 32 bits from two 64-bit integer vectors of\n"
23571"/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].\n"
23572"///\n"
23573"/// \\headerfile <x86intrin.h>\n"
23574"///\n"
23575"/// This intrinsic corresponds to the <c> PUNPCKLWD </c> instruction.\n"
23576"///\n"
23577"/// \\param __m1\n"
23578"/// A 64-bit integer vector of [4 x i16].\n"
23579"/// Bits [15:0] are written to bits [15:0] of the result. \\n\n"
23580"/// Bits [31:16] are written to bits [47:32] of the result.\n"
23581"/// \\param __m2\n"
23582"/// A 64-bit integer vector of [4 x i16].\n"
23583"/// Bits [15:0] are written to bits [31:16] of the result. \\n\n"
23584"/// Bits [31:16] are written to bits [63:48] of the result.\n"
23585"/// \\returns A 64-bit integer vector of [4 x i16] containing the interleaved\n"
23586"/// values.\n"
23587"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23588"_mm_unpacklo_pi16(__m64 __m1, __m64 __m2)\n"
23589"{\n"
23590" return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2);\n"
23591"}\n"
23592"\n"
23593"/// Unpacks the lower 32 bits from two 64-bit integer vectors of\n"
23594"/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].\n"
23595"///\n"
23596"/// \\headerfile <x86intrin.h>\n"
23597"///\n"
23598"/// This intrinsic corresponds to the <c> PUNPCKLDQ </c> instruction.\n"
23599"///\n"
23600"/// \\param __m1\n"
23601"/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to\n"
23602"/// the lower 32 bits of the result.\n"
23603"/// \\param __m2\n"
23604"/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to\n"
23605"/// the upper 32 bits of the result.\n"
23606"/// \\returns A 64-bit integer vector of [2 x i32] containing the interleaved\n"
23607"/// values.\n"
23608"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23609"_mm_unpacklo_pi32(__m64 __m1, __m64 __m2)\n"
23610"{\n"
23611" return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2);\n"
23612"}\n"
23613"\n"
23614"/// Adds each 8-bit integer element of the first 64-bit integer vector\n"
23615"/// of [8 x i8] to the corresponding 8-bit integer element of the second\n"
23616"/// 64-bit integer vector of [8 x i8]. The lower 8 bits of the results are\n"
23617"/// packed into a 64-bit integer vector of [8 x i8].\n"
23618"///\n"
23619"/// \\headerfile <x86intrin.h>\n"
23620"///\n"
23621"/// This intrinsic corresponds to the <c> PADDB </c> instruction.\n"
23622"///\n"
23623"/// \\param __m1\n"
23624"/// A 64-bit integer vector of [8 x i8].\n"
23625"/// \\param __m2\n"
23626"/// A 64-bit integer vector of [8 x i8].\n"
23627"/// \\returns A 64-bit integer vector of [8 x i8] containing the sums of both\n"
23628"/// parameters.\n"
23629"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23630"_mm_add_pi8(__m64 __m1, __m64 __m2)\n"
23631"{\n"
23632" return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2);\n"
23633"}\n"
23634"\n"
23635"/// Adds each 16-bit integer element of the first 64-bit integer vector\n"
23636"/// of [4 x i16] to the corresponding 16-bit integer element of the second\n"
23637"/// 64-bit integer vector of [4 x i16]. The lower 16 bits of the results are\n"
23638"/// packed into a 64-bit integer vector of [4 x i16].\n"
23639"///\n"
23640"/// \\headerfile <x86intrin.h>\n"
23641"///\n"
23642"/// This intrinsic corresponds to the <c> PADDW </c> instruction.\n"
23643"///\n"
23644"/// \\param __m1\n"
23645"/// A 64-bit integer vector of [4 x i16].\n"
23646"/// \\param __m2\n"
23647"/// A 64-bit integer vector of [4 x i16].\n"
23648"/// \\returns A 64-bit integer vector of [4 x i16] containing the sums of both\n"
23649"/// parameters.\n"
23650"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23651"_mm_add_pi16(__m64 __m1, __m64 __m2)\n"
23652"{\n"
23653" return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2);\n"
23654"}\n"
23655"\n"
23656"/// Adds each 32-bit integer element of the first 64-bit integer vector\n"
23657"/// of [2 x i32] to the corresponding 32-bit integer element of the second\n"
23658"/// 64-bit integer vector of [2 x i32]. The lower 32 bits of the results are\n"
23659"/// packed into a 64-bit integer vector of [2 x i32].\n"
23660"///\n"
23661"/// \\headerfile <x86intrin.h>\n"
23662"///\n"
23663"/// This intrinsic corresponds to the <c> PADDD </c> instruction.\n"
23664"///\n"
23665"/// \\param __m1\n"
23666"/// A 64-bit integer vector of [2 x i32].\n"
23667"/// \\param __m2\n"
23668"/// A 64-bit integer vector of [2 x i32].\n"
23669"/// \\returns A 64-bit integer vector of [2 x i32] containing the sums of both\n"
23670"/// parameters.\n"
23671"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23672"_mm_add_pi32(__m64 __m1, __m64 __m2)\n"
23673"{\n"
23674" return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2);\n"
23675"}\n"
23676"\n"
23677"/// Adds each 8-bit signed integer element of the first 64-bit integer\n"
23678"/// vector of [8 x i8] to the corresponding 8-bit signed integer element of\n"
23679"/// the second 64-bit integer vector of [8 x i8]. Positive sums greater than\n"
23680"/// 0x7F are saturated to 0x7F. Negative sums less than 0x80 are saturated to\n"
23681"/// 0x80. The results are packed into a 64-bit integer vector of [8 x i8].\n"
23682"///\n"
23683"/// \\headerfile <x86intrin.h>\n"
23684"///\n"
23685"/// This intrinsic corresponds to the <c> PADDSB </c> instruction.\n"
23686"///\n"
23687"/// \\param __m1\n"
23688"/// A 64-bit integer vector of [8 x i8].\n"
23689"/// \\param __m2\n"
23690"/// A 64-bit integer vector of [8 x i8].\n"
23691"/// \\returns A 64-bit integer vector of [8 x i8] containing the saturated sums\n"
23692"/// of both parameters.\n"
23693"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23694"_mm_adds_pi8(__m64 __m1, __m64 __m2)\n"
23695"{\n"
23696" return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);\n"
23697"}\n"
23698"\n"
23699"/// Adds each 16-bit signed integer element of the first 64-bit integer\n"
23700"/// vector of [4 x i16] to the corresponding 16-bit signed integer element of\n"
23701"/// the second 64-bit integer vector of [4 x i16]. Positive sums greater than\n"
23702"/// 0x7FFF are saturated to 0x7FFF. Negative sums less than 0x8000 are\n"
23703"/// saturated to 0x8000. The results are packed into a 64-bit integer vector\n"
23704"/// of [4 x i16].\n"
23705"///\n"
23706"/// \\headerfile <x86intrin.h>\n"
23707"///\n"
23708"/// This intrinsic corresponds to the <c> PADDSW </c> instruction.\n"
23709"///\n"
23710"/// \\param __m1\n"
23711"/// A 64-bit integer vector of [4 x i16].\n"
23712"/// \\param __m2\n"
23713"/// A 64-bit integer vector of [4 x i16].\n"
23714"/// \\returns A 64-bit integer vector of [4 x i16] containing the saturated sums\n"
23715"/// of both parameters.\n"
23716"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23717"_mm_adds_pi16(__m64 __m1, __m64 __m2)\n"
23718"{\n"
23719" return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);\n"
23720"}\n"
23721"\n"
23722"/// Adds each 8-bit unsigned integer element of the first 64-bit integer\n"
23723"/// vector of [8 x i8] to the corresponding 8-bit unsigned integer element of\n"
23724"/// the second 64-bit integer vector of [8 x i8]. Sums greater than 0xFF are\n"
23725"/// saturated to 0xFF. The results are packed into a 64-bit integer vector of\n"
23726"/// [8 x i8].\n"
23727"///\n"
23728"/// \\headerfile <x86intrin.h>\n"
23729"///\n"
23730"/// This intrinsic corresponds to the <c> PADDUSB </c> instruction.\n"
23731"///\n"
23732"/// \\param __m1\n"
23733"/// A 64-bit integer vector of [8 x i8].\n"
23734"/// \\param __m2\n"
23735"/// A 64-bit integer vector of [8 x i8].\n"
23736"/// \\returns A 64-bit integer vector of [8 x i8] containing the saturated\n"
23737"/// unsigned sums of both parameters.\n"
23738"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23739"_mm_adds_pu8(__m64 __m1, __m64 __m2)\n"
23740"{\n"
23741" return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);\n"
23742"}\n"
23743"\n"
23744"/// Adds each 16-bit unsigned integer element of the first 64-bit integer\n"
23745"/// vector of [4 x i16] to the corresponding 16-bit unsigned integer element\n"
23746"/// of the second 64-bit integer vector of [4 x i16]. Sums greater than\n"
23747"/// 0xFFFF are saturated to 0xFFFF. The results are packed into a 64-bit\n"
23748"/// integer vector of [4 x i16].\n"
23749"///\n"
23750"/// \\headerfile <x86intrin.h>\n"
23751"///\n"
23752"/// This intrinsic corresponds to the <c> PADDUSW </c> instruction.\n"
23753"///\n"
23754"/// \\param __m1\n"
23755"/// A 64-bit integer vector of [4 x i16].\n"
23756"/// \\param __m2\n"
23757"/// A 64-bit integer vector of [4 x i16].\n"
23758"/// \\returns A 64-bit integer vector of [4 x i16] containing the saturated\n"
23759"/// unsigned sums of both parameters.\n"
23760"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23761"_mm_adds_pu16(__m64 __m1, __m64 __m2)\n"
23762"{\n"
23763" return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2);\n"
23764"}\n"
23765"\n"
23766"/// Subtracts each 8-bit integer element of the second 64-bit integer\n"
23767"/// vector of [8 x i8] from the corresponding 8-bit integer element of the\n"
23768"/// first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results\n"
23769"/// are packed into a 64-bit integer vector of [8 x i8].\n"
23770"///\n"
23771"/// \\headerfile <x86intrin.h>\n"
23772"///\n"
23773"/// This intrinsic corresponds to the <c> PSUBB </c> instruction.\n"
23774"///\n"
23775"/// \\param __m1\n"
23776"/// A 64-bit integer vector of [8 x i8] containing the minuends.\n"
23777"/// \\param __m2\n"
23778"/// A 64-bit integer vector of [8 x i8] containing the subtrahends.\n"
23779"/// \\returns A 64-bit integer vector of [8 x i8] containing the differences of\n"
23780"/// both parameters.\n"
23781"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23782"_mm_sub_pi8(__m64 __m1, __m64 __m2)\n"
23783"{\n"
23784" return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2);\n"
23785"}\n"
23786"\n"
23787"/// Subtracts each 16-bit integer element of the second 64-bit integer\n"
23788"/// vector of [4 x i16] from the corresponding 16-bit integer element of the\n"
23789"/// first 64-bit integer vector of [4 x i16]. The lower 16 bits of the\n"
23790"/// results are packed into a 64-bit integer vector of [4 x i16].\n"
23791"///\n"
23792"/// \\headerfile <x86intrin.h>\n"
23793"///\n"
23794"/// This intrinsic corresponds to the <c> PSUBW </c> instruction.\n"
23795"///\n"
23796"/// \\param __m1\n"
23797"/// A 64-bit integer vector of [4 x i16] containing the minuends.\n"
23798"/// \\param __m2\n"
23799"/// A 64-bit integer vector of [4 x i16] containing the subtrahends.\n"
23800"/// \\returns A 64-bit integer vector of [4 x i16] containing the differences of\n"
23801"/// both parameters.\n"
23802"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23803"_mm_sub_pi16(__m64 __m1, __m64 __m2)\n"
23804"{\n"
23805" return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2);\n"
23806"}\n"
23807"\n"
23808"/// Subtracts each 32-bit integer element of the second 64-bit integer\n"
23809"/// vector of [2 x i32] from the corresponding 32-bit integer element of the\n"
23810"/// first 64-bit integer vector of [2 x i32]. The lower 32 bits of the\n"
23811"/// results are packed into a 64-bit integer vector of [2 x i32].\n"
23812"///\n"
23813"/// \\headerfile <x86intrin.h>\n"
23814"///\n"
23815"/// This intrinsic corresponds to the <c> PSUBD </c> instruction.\n"
23816"///\n"
23817"/// \\param __m1\n"
23818"/// A 64-bit integer vector of [2 x i32] containing the minuends.\n"
23819"/// \\param __m2\n"
23820"/// A 64-bit integer vector of [2 x i32] containing the subtrahends.\n"
23821"/// \\returns A 64-bit integer vector of [2 x i32] containing the differences of\n"
23822"/// both parameters.\n"
23823"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23824"_mm_sub_pi32(__m64 __m1, __m64 __m2)\n"
23825"{\n"
23826" return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2);\n"
23827"}\n"
23828"\n"
23829"/// Subtracts each 8-bit signed integer element of the second 64-bit\n"
23830"/// integer vector of [8 x i8] from the corresponding 8-bit signed integer\n"
23831"/// element of the first 64-bit integer vector of [8 x i8]. Positive results\n"
23832"/// greater than 0x7F are saturated to 0x7F. Negative results less than 0x80\n"
23833"/// are saturated to 0x80. The results are packed into a 64-bit integer\n"
23834"/// vector of [8 x i8].\n"
23835"///\n"
23836"/// \\headerfile <x86intrin.h>\n"
23837"///\n"
23838"/// This intrinsic corresponds to the <c> PSUBSB </c> instruction.\n"
23839"///\n"
23840"/// \\param __m1\n"
23841"/// A 64-bit integer vector of [8 x i8] containing the minuends.\n"
23842"/// \\param __m2\n"
23843"/// A 64-bit integer vector of [8 x i8] containing the subtrahends.\n"
23844"/// \\returns A 64-bit integer vector of [8 x i8] containing the saturated\n"
23845"/// differences of both parameters.\n"
23846"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23847"_mm_subs_pi8(__m64 __m1, __m64 __m2)\n"
23848"{\n"
23849" return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);\n"
23850"}\n"
23851"\n"
23852"/// Subtracts each 16-bit signed integer element of the second 64-bit\n"
23853"/// integer vector of [4 x i16] from the corresponding 16-bit signed integer\n"
23854"/// element of the first 64-bit integer vector of [4 x i16]. Positive results\n"
23855"/// greater than 0x7FFF are saturated to 0x7FFF. Negative results less than\n"
23856"/// 0x8000 are saturated to 0x8000. The results are packed into a 64-bit\n"
23857"/// integer vector of [4 x i16].\n"
23858"///\n"
23859"/// \\headerfile <x86intrin.h>\n"
23860"///\n"
23861"/// This intrinsic corresponds to the <c> PSUBSW </c> instruction.\n"
23862"///\n"
23863"/// \\param __m1\n"
23864"/// A 64-bit integer vector of [4 x i16] containing the minuends.\n"
23865"/// \\param __m2\n"
23866"/// A 64-bit integer vector of [4 x i16] containing the subtrahends.\n"
23867"/// \\returns A 64-bit integer vector of [4 x i16] containing the saturated\n"
23868"/// differences of both parameters.\n"
23869"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23870"_mm_subs_pi16(__m64 __m1, __m64 __m2)\n"
23871"{\n"
23872" return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2);\n"
23873"}\n"
23874"\n"
23875"/// Subtracts each 8-bit unsigned integer element of the second 64-bit\n"
23876"/// integer vector of [8 x i8] from the corresponding 8-bit unsigned integer\n"
23877"/// element of the first 64-bit integer vector of [8 x i8].\n"
23878"///\n"
23879"/// If an element of the first vector is less than the corresponding element\n"
23880"/// of the second vector, the result is saturated to 0. The results are\n"
23881"/// packed into a 64-bit integer vector of [8 x i8].\n"
23882"///\n"
23883"/// \\headerfile <x86intrin.h>\n"
23884"///\n"
23885"/// This intrinsic corresponds to the <c> PSUBUSB </c> instruction.\n"
23886"///\n"
23887"/// \\param __m1\n"
23888"/// A 64-bit integer vector of [8 x i8] containing the minuends.\n"
23889"/// \\param __m2\n"
23890"/// A 64-bit integer vector of [8 x i8] containing the subtrahends.\n"
23891"/// \\returns A 64-bit integer vector of [8 x i8] containing the saturated\n"
23892"/// differences of both parameters.\n"
23893"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23894"_mm_subs_pu8(__m64 __m1, __m64 __m2)\n"
23895"{\n"
23896" return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2);\n"
23897"}\n"
23898"\n"
23899"/// Subtracts each 16-bit unsigned integer element of the second 64-bit\n"
23900"/// integer vector of [4 x i16] from the corresponding 16-bit unsigned\n"
23901"/// integer element of the first 64-bit integer vector of [4 x i16].\n"
23902"///\n"
23903"/// If an element of the first vector is less than the corresponding element\n"
23904"/// of the second vector, the result is saturated to 0. The results are\n"
23905"/// packed into a 64-bit integer vector of [4 x i16].\n"
23906"///\n"
23907"/// \\headerfile <x86intrin.h>\n"
23908"///\n"
23909"/// This intrinsic corresponds to the <c> PSUBUSW </c> instruction.\n"
23910"///\n"
23911"/// \\param __m1\n"
23912"/// A 64-bit integer vector of [4 x i16] containing the minuends.\n"
23913"/// \\param __m2\n"
23914"/// A 64-bit integer vector of [4 x i16] containing the subtrahends.\n"
23915"/// \\returns A 64-bit integer vector of [4 x i16] containing the saturated\n"
23916"/// differences of both parameters.\n"
23917"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23918"_mm_subs_pu16(__m64 __m1, __m64 __m2)\n"
23919"{\n"
23920" return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2);\n"
23921"}\n"
23922"\n"
23923"/// Multiplies each 16-bit signed integer element of the first 64-bit\n"
23924"/// integer vector of [4 x i16] by the corresponding 16-bit signed integer\n"
23925"/// element of the second 64-bit integer vector of [4 x i16] and get four\n"
23926"/// 32-bit products. Adds adjacent pairs of products to get two 32-bit sums.\n"
23927"/// The lower 32 bits of these two sums are packed into a 64-bit integer\n"
23928"/// vector of [2 x i32].\n"
23929"///\n"
23930"/// For example, bits [15:0] of both parameters are multiplied, bits [31:16]\n"
23931"/// of both parameters are multiplied, and the sum of both results is written\n"
23932"/// to bits [31:0] of the result.\n"
23933"///\n"
23934"/// \\headerfile <x86intrin.h>\n"
23935"///\n"
23936"/// This intrinsic corresponds to the <c> PMADDWD </c> instruction.\n"
23937"///\n"
23938"/// \\param __m1\n"
23939"/// A 64-bit integer vector of [4 x i16].\n"
23940"/// \\param __m2\n"
23941"/// A 64-bit integer vector of [4 x i16].\n"
23942"/// \\returns A 64-bit integer vector of [2 x i32] containing the sums of\n"
23943"/// products of both parameters.\n"
23944"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23945"_mm_madd_pi16(__m64 __m1, __m64 __m2)\n"
23946"{\n"
23947" return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2);\n"
23948"}\n"
23949"\n"
23950"/// Multiplies each 16-bit signed integer element of the first 64-bit\n"
23951"/// integer vector of [4 x i16] by the corresponding 16-bit signed integer\n"
23952"/// element of the second 64-bit integer vector of [4 x i16]. Packs the upper\n"
23953"/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].\n"
23954"///\n"
23955"/// \\headerfile <x86intrin.h>\n"
23956"///\n"
23957"/// This intrinsic corresponds to the <c> PMULHW </c> instruction.\n"
23958"///\n"
23959"/// \\param __m1\n"
23960"/// A 64-bit integer vector of [4 x i16].\n"
23961"/// \\param __m2\n"
23962"/// A 64-bit integer vector of [4 x i16].\n"
23963"/// \\returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits\n"
23964"/// of the products of both parameters.\n"
23965"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23966"_mm_mulhi_pi16(__m64 __m1, __m64 __m2)\n"
23967"{\n"
23968" return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2);\n"
23969"}\n"
23970"\n"
23971"/// Multiplies each 16-bit signed integer element of the first 64-bit\n"
23972"/// integer vector of [4 x i16] by the corresponding 16-bit signed integer\n"
23973"/// element of the second 64-bit integer vector of [4 x i16]. Packs the lower\n"
23974"/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].\n"
23975"///\n"
23976"/// \\headerfile <x86intrin.h>\n"
23977"///\n"
23978"/// This intrinsic corresponds to the <c> PMULLW </c> instruction.\n"
23979"///\n"
23980"/// \\param __m1\n"
23981"/// A 64-bit integer vector of [4 x i16].\n"
23982"/// \\param __m2\n"
23983"/// A 64-bit integer vector of [4 x i16].\n"
23984"/// \\returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits\n"
23985"/// of the products of both parameters.\n"
23986"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
23987"_mm_mullo_pi16(__m64 __m1, __m64 __m2)\n"
23988"{\n"
23989" return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2);\n"
23990"}\n"
23991"\n"
23992"/// Left-shifts each 16-bit signed integer element of the first\n"
23993"/// parameter, which is a 64-bit integer vector of [4 x i16], by the number\n"
23994"/// of bits specified by the second parameter, which is a 64-bit integer. The\n"
23995"/// lower 16 bits of the results are packed into a 64-bit integer vector of\n"
23996"/// [4 x i16].\n"
23997"///\n"
23998"/// \\headerfile <x86intrin.h>\n"
23999"///\n"
24000"/// This intrinsic corresponds to the <c> PSLLW </c> instruction.\n"
24001"///\n"
24002"/// \\param __m\n"
24003"/// A 64-bit integer vector of [4 x i16].\n"
24004"/// \\param __count\n"
24005"/// A 64-bit integer vector interpreted as a single 64-bit integer.\n"
24006"/// \\returns A 64-bit integer vector of [4 x i16] containing the left-shifted\n"
24007"/// values. If \\a __count is greater or equal to 16, the result is set to all\n"
24008"/// 0.\n"
24009"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24010"_mm_sll_pi16(__m64 __m, __m64 __count)\n"
24011"{\n"
24012" return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count);\n"
24013"}\n"
24014"\n"
24015"/// Left-shifts each 16-bit signed integer element of a 64-bit integer\n"
24016"/// vector of [4 x i16] by the number of bits specified by a 32-bit integer.\n"
24017"/// The lower 16 bits of the results are packed into a 64-bit integer vector\n"
24018"/// of [4 x i16].\n"
24019"///\n"
24020"/// \\headerfile <x86intrin.h>\n"
24021"///\n"
24022"/// This intrinsic corresponds to the <c> PSLLW </c> instruction.\n"
24023"///\n"
24024"/// \\param __m\n"
24025"/// A 64-bit integer vector of [4 x i16].\n"
24026"/// \\param __count\n"
24027"/// A 32-bit integer value.\n"
24028"/// \\returns A 64-bit integer vector of [4 x i16] containing the left-shifted\n"
24029"/// values. If \\a __count is greater or equal to 16, the result is set to all\n"
24030"/// 0.\n"
24031"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24032"_mm_slli_pi16(__m64 __m, int __count)\n"
24033"{\n"
24034" return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);\n"
24035"}\n"
24036"\n"
24037"/// Left-shifts each 32-bit signed integer element of the first\n"
24038"/// parameter, which is a 64-bit integer vector of [2 x i32], by the number\n"
24039"/// of bits specified by the second parameter, which is a 64-bit integer. The\n"
24040"/// lower 32 bits of the results are packed into a 64-bit integer vector of\n"
24041"/// [2 x i32].\n"
24042"///\n"
24043"/// \\headerfile <x86intrin.h>\n"
24044"///\n"
24045"/// This intrinsic corresponds to the <c> PSLLD </c> instruction.\n"
24046"///\n"
24047"/// \\param __m\n"
24048"/// A 64-bit integer vector of [2 x i32].\n"
24049"/// \\param __count\n"
24050"/// A 64-bit integer vector interpreted as a single 64-bit integer.\n"
24051"/// \\returns A 64-bit integer vector of [2 x i32] containing the left-shifted\n"
24052"/// values. If \\a __count is greater or equal to 32, the result is set to all\n"
24053"/// 0.\n"
24054"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24055"_mm_sll_pi32(__m64 __m, __m64 __count)\n"
24056"{\n"
24057" return (__m64)__builtin_ia32_pslld((__v2si)__m, __count);\n"
24058"}\n"
24059"\n"
24060"/// Left-shifts each 32-bit signed integer element of a 64-bit integer\n"
24061"/// vector of [2 x i32] by the number of bits specified by a 32-bit integer.\n"
24062"/// The lower 32 bits of the results are packed into a 64-bit integer vector\n"
24063"/// of [2 x i32].\n"
24064"///\n"
24065"/// \\headerfile <x86intrin.h>\n"
24066"///\n"
24067"/// This intrinsic corresponds to the <c> PSLLD </c> instruction.\n"
24068"///\n"
24069"/// \\param __m\n"
24070"/// A 64-bit integer vector of [2 x i32].\n"
24071"/// \\param __count\n"
24072"/// A 32-bit integer value.\n"
24073"/// \\returns A 64-bit integer vector of [2 x i32] containing the left-shifted\n"
24074"/// values. If \\a __count is greater or equal to 32, the result is set to all\n"
24075"/// 0.\n"
24076"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24077"_mm_slli_pi32(__m64 __m, int __count)\n"
24078"{\n"
24079" return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count);\n"
24080"}\n"
24081"\n"
24082"/// Left-shifts the first 64-bit integer parameter by the number of bits\n"
24083"/// specified by the second 64-bit integer parameter. The lower 64 bits of\n"
24084"/// result are returned.\n"
24085"///\n"
24086"/// \\headerfile <x86intrin.h>\n"
24087"///\n"
24088"/// This intrinsic corresponds to the <c> PSLLQ </c> instruction.\n"
24089"///\n"
24090"/// \\param __m\n"
24091"/// A 64-bit integer vector interpreted as a single 64-bit integer.\n"
24092"/// \\param __count\n"
24093"/// A 64-bit integer vector interpreted as a single 64-bit integer.\n"
24094"/// \\returns A 64-bit integer vector containing the left-shifted value. If\n"
24095"/// \\a __count is greater or equal to 64, the result is set to 0.\n"
24096"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24097"_mm_sll_si64(__m64 __m, __m64 __count)\n"
24098"{\n"
24099" return (__m64)__builtin_ia32_psllq((__v1di)__m, __count);\n"
24100"}\n"
24101"\n"
24102"/// Left-shifts the first parameter, which is a 64-bit integer, by the\n"
24103"/// number of bits specified by the second parameter, which is a 32-bit\n"
24104"/// integer. The lower 64 bits of result are returned.\n"
24105"///\n"
24106"/// \\headerfile <x86intrin.h>\n"
24107"///\n"
24108"/// This intrinsic corresponds to the <c> PSLLQ </c> instruction.\n"
24109"///\n"
24110"/// \\param __m\n"
24111"/// A 64-bit integer vector interpreted as a single 64-bit integer.\n"
24112"/// \\param __count\n"
24113"/// A 32-bit integer value.\n"
24114"/// \\returns A 64-bit integer vector containing the left-shifted value. If\n"
24115"/// \\a __count is greater or equal to 64, the result is set to 0.\n"
24116"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24117"_mm_slli_si64(__m64 __m, int __count)\n"
24118"{\n"
24119" return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count);\n"
24120"}\n"
24121"\n"
24122"/// Right-shifts each 16-bit integer element of the first parameter,\n"
24123"/// which is a 64-bit integer vector of [4 x i16], by the number of bits\n"
24124"/// specified by the second parameter, which is a 64-bit integer.\n"
24125"///\n"
24126"/// High-order bits are filled with the sign bit of the initial value of each\n"
24127"/// 16-bit element. The 16-bit results are packed into a 64-bit integer\n"
24128"/// vector of [4 x i16].\n"
24129"///\n"
24130"/// \\headerfile <x86intrin.h>\n"
24131"///\n"
24132"/// This intrinsic corresponds to the <c> PSRAW </c> instruction.\n"
24133"///\n"
24134"/// \\param __m\n"
24135"/// A 64-bit integer vector of [4 x i16].\n"
24136"/// \\param __count\n"
24137"/// A 64-bit integer vector interpreted as a single 64-bit integer.\n"
24138"/// \\returns A 64-bit integer vector of [4 x i16] containing the right-shifted\n"
24139"/// values.\n"
24140"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24141"_mm_sra_pi16(__m64 __m, __m64 __count)\n"
24142"{\n"
24143" return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count);\n"
24144"}\n"
24145"\n"
24146"/// Right-shifts each 16-bit integer element of a 64-bit integer vector\n"
24147"/// of [4 x i16] by the number of bits specified by a 32-bit integer.\n"
24148"///\n"
24149"/// High-order bits are filled with the sign bit of the initial value of each\n"
24150"/// 16-bit element. The 16-bit results are packed into a 64-bit integer\n"
24151"/// vector of [4 x i16].\n"
24152"///\n"
24153"/// \\headerfile <x86intrin.h>\n"
24154"///\n"
24155"/// This intrinsic corresponds to the <c> PSRAW </c> instruction.\n"
24156"///\n"
24157"/// \\param __m\n"
24158"/// A 64-bit integer vector of [4 x i16].\n"
24159"/// \\param __count\n"
24160"/// A 32-bit integer value.\n"
24161"/// \\returns A 64-bit integer vector of [4 x i16] containing the right-shifted\n"
24162"/// values.\n"
24163"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24164"_mm_srai_pi16(__m64 __m, int __count)\n"
24165"{\n"
24166" return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count);\n"
24167"}\n"
24168"\n"
24169"/// Right-shifts each 32-bit integer element of the first parameter,\n"
24170"/// which is a 64-bit integer vector of [2 x i32], by the number of bits\n"
24171"/// specified by the second parameter, which is a 64-bit integer.\n"
24172"///\n"
24173"/// High-order bits are filled with the sign bit of the initial value of each\n"
24174"/// 32-bit element. The 32-bit results are packed into a 64-bit integer\n"
24175"/// vector of [2 x i32].\n"
24176"///\n"
24177"/// \\headerfile <x86intrin.h>\n"
24178"///\n"
24179"/// This intrinsic corresponds to the <c> PSRAD </c> instruction.\n"
24180"///\n"
24181"/// \\param __m\n"
24182"/// A 64-bit integer vector of [2 x i32].\n"
24183"/// \\param __count\n"
24184"/// A 64-bit integer vector interpreted as a single 64-bit integer.\n"
24185"/// \\returns A 64-bit integer vector of [2 x i32] containing the right-shifted\n"
24186"/// values.\n"
24187"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24188"_mm_sra_pi32(__m64 __m, __m64 __count)\n"
24189"{\n"
24190" return (__m64)__builtin_ia32_psrad((__v2si)__m, __count);\n"
24191"}\n"
24192"\n"
24193"/// Right-shifts each 32-bit integer element of a 64-bit integer vector\n"
24194"/// of [2 x i32] by the number of bits specified by a 32-bit integer.\n"
24195"///\n"
24196"/// High-order bits are filled with the sign bit of the initial value of each\n"
24197"/// 32-bit element. The 32-bit results are packed into a 64-bit integer\n"
24198"/// vector of [2 x i32].\n"
24199"///\n"
24200"/// \\headerfile <x86intrin.h>\n"
24201"///\n"
24202"/// This intrinsic corresponds to the <c> PSRAD </c> instruction.\n"
24203"///\n"
24204"/// \\param __m\n"
24205"/// A 64-bit integer vector of [2 x i32].\n"
24206"/// \\param __count\n"
24207"/// A 32-bit integer value.\n"
24208"/// \\returns A 64-bit integer vector of [2 x i32] containing the right-shifted\n"
24209"/// values.\n"
24210"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24211"_mm_srai_pi32(__m64 __m, int __count)\n"
24212"{\n"
24213" return (__m64)__builtin_ia32_psradi((__v2si)__m, __count);\n"
24214"}\n"
24215"\n"
24216"/// Right-shifts each 16-bit integer element of the first parameter,\n"
24217"/// which is a 64-bit integer vector of [4 x i16], by the number of bits\n"
24218"/// specified by the second parameter, which is a 64-bit integer.\n"
24219"///\n"
24220"/// High-order bits are cleared. The 16-bit results are packed into a 64-bit\n"
24221"/// integer vector of [4 x i16].\n"
24222"///\n"
24223"/// \\headerfile <x86intrin.h>\n"
24224"///\n"
24225"/// This intrinsic corresponds to the <c> PSRLW </c> instruction.\n"
24226"///\n"
24227"/// \\param __m\n"
24228"/// A 64-bit integer vector of [4 x i16].\n"
24229"/// \\param __count\n"
24230"/// A 64-bit integer vector interpreted as a single 64-bit integer.\n"
24231"/// \\returns A 64-bit integer vector of [4 x i16] containing the right-shifted\n"
24232"/// values.\n"
24233"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24234"_mm_srl_pi16(__m64 __m, __m64 __count)\n"
24235"{\n"
24236" return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count);\n"
24237"}\n"
24238"\n"
24239"/// Right-shifts each 16-bit integer element of a 64-bit integer vector\n"
24240"/// of [4 x i16] by the number of bits specified by a 32-bit integer.\n"
24241"///\n"
24242"/// High-order bits are cleared. The 16-bit results are packed into a 64-bit\n"
24243"/// integer vector of [4 x i16].\n"
24244"///\n"
24245"/// \\headerfile <x86intrin.h>\n"
24246"///\n"
24247"/// This intrinsic corresponds to the <c> PSRLW </c> instruction.\n"
24248"///\n"
24249"/// \\param __m\n"
24250"/// A 64-bit integer vector of [4 x i16].\n"
24251"/// \\param __count\n"
24252"/// A 32-bit integer value.\n"
24253"/// \\returns A 64-bit integer vector of [4 x i16] containing the right-shifted\n"
24254"/// values.\n"
24255"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24256"_mm_srli_pi16(__m64 __m, int __count)\n"
24257"{\n"
24258" return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);\n"
24259"}\n"
24260"\n"
24261"/// Right-shifts each 32-bit integer element of the first parameter,\n"
24262"/// which is a 64-bit integer vector of [2 x i32], by the number of bits\n"
24263"/// specified by the second parameter, which is a 64-bit integer.\n"
24264"///\n"
24265"/// High-order bits are cleared. The 32-bit results are packed into a 64-bit\n"
24266"/// integer vector of [2 x i32].\n"
24267"///\n"
24268"/// \\headerfile <x86intrin.h>\n"
24269"///\n"
24270"/// This intrinsic corresponds to the <c> PSRLD </c> instruction.\n"
24271"///\n"
24272"/// \\param __m\n"
24273"/// A 64-bit integer vector of [2 x i32].\n"
24274"/// \\param __count\n"
24275"/// A 64-bit integer vector interpreted as a single 64-bit integer.\n"
24276"/// \\returns A 64-bit integer vector of [2 x i32] containing the right-shifted\n"
24277"/// values.\n"
24278"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24279"_mm_srl_pi32(__m64 __m, __m64 __count)\n"
24280"{\n"
24281" return (__m64)__builtin_ia32_psrld((__v2si)__m, __count);\n"
24282"}\n"
24283"\n"
24284"/// Right-shifts each 32-bit integer element of a 64-bit integer vector\n"
24285"/// of [2 x i32] by the number of bits specified by a 32-bit integer.\n"
24286"///\n"
24287"/// High-order bits are cleared. The 32-bit results are packed into a 64-bit\n"
24288"/// integer vector of [2 x i32].\n"
24289"///\n"
24290"/// \\headerfile <x86intrin.h>\n"
24291"///\n"
24292"/// This intrinsic corresponds to the <c> PSRLD </c> instruction.\n"
24293"///\n"
24294"/// \\param __m\n"
24295"/// A 64-bit integer vector of [2 x i32].\n"
24296"/// \\param __count\n"
24297"/// A 32-bit integer value.\n"
24298"/// \\returns A 64-bit integer vector of [2 x i32] containing the right-shifted\n"
24299"/// values.\n"
24300"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24301"_mm_srli_pi32(__m64 __m, int __count)\n"
24302"{\n"
24303" return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count);\n"
24304"}\n"
24305"\n"
24306"/// Right-shifts the first 64-bit integer parameter by the number of bits\n"
24307"/// specified by the second 64-bit integer parameter.\n"
24308"///\n"
24309"/// High-order bits are cleared.\n"
24310"///\n"
24311"/// \\headerfile <x86intrin.h>\n"
24312"///\n"
24313"/// This intrinsic corresponds to the <c> PSRLQ </c> instruction.\n"
24314"///\n"
24315"/// \\param __m\n"
24316"/// A 64-bit integer vector interpreted as a single 64-bit integer.\n"
24317"/// \\param __count\n"
24318"/// A 64-bit integer vector interpreted as a single 64-bit integer.\n"
24319"/// \\returns A 64-bit integer vector containing the right-shifted value.\n"
24320"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24321"_mm_srl_si64(__m64 __m, __m64 __count)\n"
24322"{\n"
24323" return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count);\n"
24324"}\n"
24325"\n"
24326"/// Right-shifts the first parameter, which is a 64-bit integer, by the\n"
24327"/// number of bits specified by the second parameter, which is a 32-bit\n"
24328"/// integer.\n"
24329"///\n"
24330"/// High-order bits are cleared.\n"
24331"///\n"
24332"/// \\headerfile <x86intrin.h>\n"
24333"///\n"
24334"/// This intrinsic corresponds to the <c> PSRLQ </c> instruction.\n"
24335"///\n"
24336"/// \\param __m\n"
24337"/// A 64-bit integer vector interpreted as a single 64-bit integer.\n"
24338"/// \\param __count\n"
24339"/// A 32-bit integer value.\n"
24340"/// \\returns A 64-bit integer vector containing the right-shifted value.\n"
24341"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24342"_mm_srli_si64(__m64 __m, int __count)\n"
24343"{\n"
24344" return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count);\n"
24345"}\n"
24346"\n"
24347"/// Performs a bitwise AND of two 64-bit integer vectors.\n"
24348"///\n"
24349"/// \\headerfile <x86intrin.h>\n"
24350"///\n"
24351"/// This intrinsic corresponds to the <c> PAND </c> instruction.\n"
24352"///\n"
24353"/// \\param __m1\n"
24354"/// A 64-bit integer vector.\n"
24355"/// \\param __m2\n"
24356"/// A 64-bit integer vector.\n"
24357"/// \\returns A 64-bit integer vector containing the bitwise AND of both\n"
24358"/// parameters.\n"
24359"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24360"_mm_and_si64(__m64 __m1, __m64 __m2)\n"
24361"{\n"
24362" return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2);\n"
24363"}\n"
24364"\n"
24365"/// Performs a bitwise NOT of the first 64-bit integer vector, and then\n"
24366"/// performs a bitwise AND of the intermediate result and the second 64-bit\n"
24367"/// integer vector.\n"
24368"///\n"
24369"/// \\headerfile <x86intrin.h>\n"
24370"///\n"
24371"/// This intrinsic corresponds to the <c> PANDN </c> instruction.\n"
24372"///\n"
24373"/// \\param __m1\n"
24374"/// A 64-bit integer vector. The one's complement of this parameter is used\n"
24375"/// in the bitwise AND.\n"
24376"/// \\param __m2\n"
24377"/// A 64-bit integer vector.\n"
24378"/// \\returns A 64-bit integer vector containing the bitwise AND of the second\n"
24379"/// parameter and the one's complement of the first parameter.\n"
24380"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24381"_mm_andnot_si64(__m64 __m1, __m64 __m2)\n"
24382"{\n"
24383" return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2);\n"
24384"}\n"
24385"\n"
24386"/// Performs a bitwise OR of two 64-bit integer vectors.\n"
24387"///\n"
24388"/// \\headerfile <x86intrin.h>\n"
24389"///\n"
24390"/// This intrinsic corresponds to the <c> POR </c> instruction.\n"
24391"///\n"
24392"/// \\param __m1\n"
24393"/// A 64-bit integer vector.\n"
24394"/// \\param __m2\n"
24395"/// A 64-bit integer vector.\n"
24396"/// \\returns A 64-bit integer vector containing the bitwise OR of both\n"
24397"/// parameters.\n"
24398"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24399"_mm_or_si64(__m64 __m1, __m64 __m2)\n"
24400"{\n"
24401" return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2);\n"
24402"}\n"
24403"\n"
24404"/// Performs a bitwise exclusive OR of two 64-bit integer vectors.\n"
24405"///\n"
24406"/// \\headerfile <x86intrin.h>\n"
24407"///\n"
24408"/// This intrinsic corresponds to the <c> PXOR </c> instruction.\n"
24409"///\n"
24410"/// \\param __m1\n"
24411"/// A 64-bit integer vector.\n"
24412"/// \\param __m2\n"
24413"/// A 64-bit integer vector.\n"
24414"/// \\returns A 64-bit integer vector containing the bitwise exclusive OR of both\n"
24415"/// parameters.\n"
24416"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24417"_mm_xor_si64(__m64 __m1, __m64 __m2)\n"
24418"{\n"
24419" return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2);\n"
24420"}\n"
24421"\n"
24422"/// Compares the 8-bit integer elements of two 64-bit integer vectors of\n"
24423"/// [8 x i8] to determine if the element of the first vector is equal to the\n"
24424"/// corresponding element of the second vector.\n"
24425"///\n"
24426"/// The comparison yields 0 for false, 0xFF for true.\n"
24427"///\n"
24428"/// \\headerfile <x86intrin.h>\n"
24429"///\n"
24430"/// This intrinsic corresponds to the <c> PCMPEQB </c> instruction.\n"
24431"///\n"
24432"/// \\param __m1\n"
24433"/// A 64-bit integer vector of [8 x i8].\n"
24434"/// \\param __m2\n"
24435"/// A 64-bit integer vector of [8 x i8].\n"
24436"/// \\returns A 64-bit integer vector of [8 x i8] containing the comparison\n"
24437"/// results.\n"
24438"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24439"_mm_cmpeq_pi8(__m64 __m1, __m64 __m2)\n"
24440"{\n"
24441" return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2);\n"
24442"}\n"
24443"\n"
24444"/// Compares the 16-bit integer elements of two 64-bit integer vectors of\n"
24445"/// [4 x i16] to determine if the element of the first vector is equal to the\n"
24446"/// corresponding element of the second vector.\n"
24447"///\n"
24448"/// The comparison yields 0 for false, 0xFFFF for true.\n"
24449"///\n"
24450"/// \\headerfile <x86intrin.h>\n"
24451"///\n"
24452"/// This intrinsic corresponds to the <c> PCMPEQW </c> instruction.\n"
24453"///\n"
24454"/// \\param __m1\n"
24455"/// A 64-bit integer vector of [4 x i16].\n"
24456"/// \\param __m2\n"
24457"/// A 64-bit integer vector of [4 x i16].\n"
24458"/// \\returns A 64-bit integer vector of [4 x i16] containing the comparison\n"
24459"/// results.\n"
24460"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24461"_mm_cmpeq_pi16(__m64 __m1, __m64 __m2)\n"
24462"{\n"
24463" return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2);\n"
24464"}\n"
24465"\n"
24466"/// Compares the 32-bit integer elements of two 64-bit integer vectors of\n"
24467"/// [2 x i32] to determine if the element of the first vector is equal to the\n"
24468"/// corresponding element of the second vector.\n"
24469"///\n"
24470"/// The comparison yields 0 for false, 0xFFFFFFFF for true.\n"
24471"///\n"
24472"/// \\headerfile <x86intrin.h>\n"
24473"///\n"
24474"/// This intrinsic corresponds to the <c> PCMPEQD </c> instruction.\n"
24475"///\n"
24476"/// \\param __m1\n"
24477"/// A 64-bit integer vector of [2 x i32].\n"
24478"/// \\param __m2\n"
24479"/// A 64-bit integer vector of [2 x i32].\n"
24480"/// \\returns A 64-bit integer vector of [2 x i32] containing the comparison\n"
24481"/// results.\n"
24482"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24483"_mm_cmpeq_pi32(__m64 __m1, __m64 __m2)\n"
24484"{\n"
24485" return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2);\n"
24486"}\n"
24487"\n"
24488"/// Compares the 8-bit integer elements of two 64-bit integer vectors of\n"
24489"/// [8 x i8] to determine if the element of the first vector is greater than\n"
24490"/// the corresponding element of the second vector.\n"
24491"///\n"
24492"/// The comparison yields 0 for false, 0xFF for true.\n"
24493"///\n"
24494"/// \\headerfile <x86intrin.h>\n"
24495"///\n"
24496"/// This intrinsic corresponds to the <c> PCMPGTB </c> instruction.\n"
24497"///\n"
24498"/// \\param __m1\n"
24499"/// A 64-bit integer vector of [8 x i8].\n"
24500"/// \\param __m2\n"
24501"/// A 64-bit integer vector of [8 x i8].\n"
24502"/// \\returns A 64-bit integer vector of [8 x i8] containing the comparison\n"
24503"/// results.\n"
24504"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24505"_mm_cmpgt_pi8(__m64 __m1, __m64 __m2)\n"
24506"{\n"
24507" return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2);\n"
24508"}\n"
24509"\n"
24510"/// Compares the 16-bit integer elements of two 64-bit integer vectors of\n"
24511"/// [4 x i16] to determine if the element of the first vector is greater than\n"
24512"/// the corresponding element of the second vector.\n"
24513"///\n"
24514"/// The comparison yields 0 for false, 0xFFFF for true.\n"
24515"///\n"
24516"/// \\headerfile <x86intrin.h>\n"
24517"///\n"
24518"/// This intrinsic corresponds to the <c> PCMPGTW </c> instruction.\n"
24519"///\n"
24520"/// \\param __m1\n"
24521"/// A 64-bit integer vector of [4 x i16].\n"
24522"/// \\param __m2\n"
24523"/// A 64-bit integer vector of [4 x i16].\n"
24524"/// \\returns A 64-bit integer vector of [4 x i16] containing the comparison\n"
24525"/// results.\n"
24526"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24527"_mm_cmpgt_pi16(__m64 __m1, __m64 __m2)\n"
24528"{\n"
24529" return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2);\n"
24530"}\n"
24531"\n"
24532"/// Compares the 32-bit integer elements of two 64-bit integer vectors of\n"
24533"/// [2 x i32] to determine if the element of the first vector is greater than\n"
24534"/// the corresponding element of the second vector.\n"
24535"///\n"
24536"/// The comparison yields 0 for false, 0xFFFFFFFF for true.\n"
24537"///\n"
24538"/// \\headerfile <x86intrin.h>\n"
24539"///\n"
24540"/// This intrinsic corresponds to the <c> PCMPGTD </c> instruction.\n"
24541"///\n"
24542"/// \\param __m1\n"
24543"/// A 64-bit integer vector of [2 x i32].\n"
24544"/// \\param __m2\n"
24545"/// A 64-bit integer vector of [2 x i32].\n"
24546"/// \\returns A 64-bit integer vector of [2 x i32] containing the comparison\n"
24547"/// results.\n"
24548"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24549"_mm_cmpgt_pi32(__m64 __m1, __m64 __m2)\n"
24550"{\n"
24551" return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2);\n"
24552"}\n"
24553"\n"
24554"/// Constructs a 64-bit integer vector initialized to zero.\n"
24555"///\n"
24556"/// \\headerfile <x86intrin.h>\n"
24557"///\n"
24558"/// This intrinsic corresponds to the <c> PXOR </c> instruction.\n"
24559"///\n"
24560"/// \\returns An initialized 64-bit integer vector with all elements set to zero.\n"
24561"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24562"_mm_setzero_si64(void)\n"
24563"{\n"
24564" return __extension__ (__m64){ 0LL };\n"
24565"}\n"
24566"\n"
24567"/// Constructs a 64-bit integer vector initialized with the specified\n"
24568"/// 32-bit integer values.\n"
24569"///\n"
24570"/// \\headerfile <x86intrin.h>\n"
24571"///\n"
24572"/// This intrinsic is a utility function and does not correspond to a specific\n"
24573"/// instruction.\n"
24574"///\n"
24575"/// \\param __i1\n"
24576"/// A 32-bit integer value used to initialize the upper 32 bits of the\n"
24577"/// result.\n"
24578"/// \\param __i0\n"
24579"/// A 32-bit integer value used to initialize the lower 32 bits of the\n"
24580"/// result.\n"
24581"/// \\returns An initialized 64-bit integer vector.\n"
24582"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24583"_mm_set_pi32(int __i1, int __i0)\n"
24584"{\n"
24585" return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1);\n"
24586"}\n"
24587"\n"
24588"/// Constructs a 64-bit integer vector initialized with the specified\n"
24589"/// 16-bit integer values.\n"
24590"///\n"
24591"/// \\headerfile <x86intrin.h>\n"
24592"///\n"
24593"/// This intrinsic is a utility function and does not correspond to a specific\n"
24594"/// instruction.\n"
24595"///\n"
24596"/// \\param __s3\n"
24597"/// A 16-bit integer value used to initialize bits [63:48] of the result.\n"
24598"/// \\param __s2\n"
24599"/// A 16-bit integer value used to initialize bits [47:32] of the result.\n"
24600"/// \\param __s1\n"
24601"/// A 16-bit integer value used to initialize bits [31:16] of the result.\n"
24602"/// \\param __s0\n"
24603"/// A 16-bit integer value used to initialize bits [15:0] of the result.\n"
24604"/// \\returns An initialized 64-bit integer vector.\n"
24605"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24606"_mm_set_pi16(short __s3, short __s2, short __s1, short __s0)\n"
24607"{\n"
24608" return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3);\n"
24609"}\n"
24610"\n"
24611"/// Constructs a 64-bit integer vector initialized with the specified\n"
24612"/// 8-bit integer values.\n"
24613"///\n"
24614"/// \\headerfile <x86intrin.h>\n"
24615"///\n"
24616"/// This intrinsic is a utility function and does not correspond to a specific\n"
24617"/// instruction.\n"
24618"///\n"
24619"/// \\param __b7\n"
24620"/// An 8-bit integer value used to initialize bits [63:56] of the result.\n"
24621"/// \\param __b6\n"
24622"/// An 8-bit integer value used to initialize bits [55:48] of the result.\n"
24623"/// \\param __b5\n"
24624"/// An 8-bit integer value used to initialize bits [47:40] of the result.\n"
24625"/// \\param __b4\n"
24626"/// An 8-bit integer value used to initialize bits [39:32] of the result.\n"
24627"/// \\param __b3\n"
24628"/// An 8-bit integer value used to initialize bits [31:24] of the result.\n"
24629"/// \\param __b2\n"
24630"/// An 8-bit integer value used to initialize bits [23:16] of the result.\n"
24631"/// \\param __b1\n"
24632"/// An 8-bit integer value used to initialize bits [15:8] of the result.\n"
24633"/// \\param __b0\n"
24634"/// An 8-bit integer value used to initialize bits [7:0] of the result.\n"
24635"/// \\returns An initialized 64-bit integer vector.\n"
24636"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24637"_mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,\n"
24638" char __b1, char __b0)\n"
24639"{\n"
24640" return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3,\n"
24641" __b4, __b5, __b6, __b7);\n"
24642"}\n"
24643"\n"
24644"/// Constructs a 64-bit integer vector of [2 x i32], with each of the\n"
24645"/// 32-bit integer vector elements set to the specified 32-bit integer\n"
24646"/// value.\n"
24647"///\n"
24648"/// \\headerfile <x86intrin.h>\n"
24649"///\n"
24650"/// This intrinsic is a utility function and does not correspond to a specific\n"
24651"/// instruction.\n"
24652"///\n"
24653"/// \\param __i\n"
24654"/// A 32-bit integer value used to initialize each vector element of the\n"
24655"/// result.\n"
24656"/// \\returns An initialized 64-bit integer vector of [2 x i32].\n"
24657"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24658"_mm_set1_pi32(int __i)\n"
24659"{\n"
24660" return _mm_set_pi32(__i, __i);\n"
24661"}\n"
24662"\n"
24663"/// Constructs a 64-bit integer vector of [4 x i16], with each of the\n"
24664"/// 16-bit integer vector elements set to the specified 16-bit integer\n"
24665"/// value.\n"
24666"///\n"
24667"/// \\headerfile <x86intrin.h>\n"
24668"///\n"
24669"/// This intrinsic is a utility function and does not correspond to a specific\n"
24670"/// instruction.\n"
24671"///\n"
24672"/// \\param __w\n"
24673"/// A 16-bit integer value used to initialize each vector element of the\n"
24674"/// result.\n"
24675"/// \\returns An initialized 64-bit integer vector of [4 x i16].\n"
24676"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24677"_mm_set1_pi16(short __w)\n"
24678"{\n"
24679" return _mm_set_pi16(__w, __w, __w, __w);\n"
24680"}\n"
24681"\n"
24682"/// Constructs a 64-bit integer vector of [8 x i8], with each of the\n"
24683"/// 8-bit integer vector elements set to the specified 8-bit integer value.\n"
24684"///\n"
24685"/// \\headerfile <x86intrin.h>\n"
24686"///\n"
24687"/// This intrinsic is a utility function and does not correspond to a specific\n"
24688"/// instruction.\n"
24689"///\n"
24690"/// \\param __b\n"
24691"/// An 8-bit integer value used to initialize each vector element of the\n"
24692"/// result.\n"
24693"/// \\returns An initialized 64-bit integer vector of [8 x i8].\n"
24694"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24695"_mm_set1_pi8(char __b)\n"
24696"{\n"
24697" return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);\n"
24698"}\n"
24699"\n"
24700"/// Constructs a 64-bit integer vector, initialized in reverse order with\n"
24701"/// the specified 32-bit integer values.\n"
24702"///\n"
24703"/// \\headerfile <x86intrin.h>\n"
24704"///\n"
24705"/// This intrinsic is a utility function and does not correspond to a specific\n"
24706"/// instruction.\n"
24707"///\n"
24708"/// \\param __i0\n"
24709"/// A 32-bit integer value used to initialize the lower 32 bits of the\n"
24710"/// result.\n"
24711"/// \\param __i1\n"
24712"/// A 32-bit integer value used to initialize the upper 32 bits of the\n"
24713"/// result.\n"
24714"/// \\returns An initialized 64-bit integer vector.\n"
24715"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24716"_mm_setr_pi32(int __i0, int __i1)\n"
24717"{\n"
24718" return _mm_set_pi32(__i1, __i0);\n"
24719"}\n"
24720"\n"
24721"/// Constructs a 64-bit integer vector, initialized in reverse order with\n"
24722"/// the specified 16-bit integer values.\n"
24723"///\n"
24724"/// \\headerfile <x86intrin.h>\n"
24725"///\n"
24726"/// This intrinsic is a utility function and does not correspond to a specific\n"
24727"/// instruction.\n"
24728"///\n"
24729"/// \\param __w0\n"
24730"/// A 16-bit integer value used to initialize bits [15:0] of the result.\n"
24731"/// \\param __w1\n"
24732"/// A 16-bit integer value used to initialize bits [31:16] of the result.\n"
24733"/// \\param __w2\n"
24734"/// A 16-bit integer value used to initialize bits [47:32] of the result.\n"
24735"/// \\param __w3\n"
24736"/// A 16-bit integer value used to initialize bits [63:48] of the result.\n"
24737"/// \\returns An initialized 64-bit integer vector.\n"
24738"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24739"_mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)\n"
24740"{\n"
24741" return _mm_set_pi16(__w3, __w2, __w1, __w0);\n"
24742"}\n"
24743"\n"
24744"/// Constructs a 64-bit integer vector, initialized in reverse order with\n"
24745"/// the specified 8-bit integer values.\n"
24746"///\n"
24747"/// \\headerfile <x86intrin.h>\n"
24748"///\n"
24749"/// This intrinsic is a utility function and does not correspond to a specific\n"
24750"/// instruction.\n"
24751"///\n"
24752"/// \\param __b0\n"
24753"/// An 8-bit integer value used to initialize bits [7:0] of the result.\n"
24754"/// \\param __b1\n"
24755"/// An 8-bit integer value used to initialize bits [15:8] of the result.\n"
24756"/// \\param __b2\n"
24757"/// An 8-bit integer value used to initialize bits [23:16] of the result.\n"
24758"/// \\param __b3\n"
24759"/// An 8-bit integer value used to initialize bits [31:24] of the result.\n"
24760"/// \\param __b4\n"
24761"/// An 8-bit integer value used to initialize bits [39:32] of the result.\n"
24762"/// \\param __b5\n"
24763"/// An 8-bit integer value used to initialize bits [47:40] of the result.\n"
24764"/// \\param __b6\n"
24765"/// An 8-bit integer value used to initialize bits [55:48] of the result.\n"
24766"/// \\param __b7\n"
24767"/// An 8-bit integer value used to initialize bits [63:56] of the result.\n"
24768"/// \\returns An initialized 64-bit integer vector.\n"
24769"static __inline__ __m64 __DEFAULT_FN_ATTRS\n"
24770"_mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,\n"
24771" char __b6, char __b7)\n"
24772"{\n"
24773" return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);\n"
24774"}\n"
24775"\n"
24776"#undef __DEFAULT_FN_ATTRS\n"
24777"\n"
24778"/* Aliases for compatibility. */\n"
24779"#define _m_empty _mm_empty\n"
24780"#define _m_from_int _mm_cvtsi32_si64\n"
24781"#define _m_from_int64 _mm_cvtsi64_m64\n"
24782"#define _m_to_int _mm_cvtsi64_si32\n"
24783"#define _m_to_int64 _mm_cvtm64_si64\n"
24784"#define _m_packsswb _mm_packs_pi16\n"
24785"#define _m_packssdw _mm_packs_pi32\n"
24786"#define _m_packuswb _mm_packs_pu16\n"
24787"#define _m_punpckhbw _mm_unpackhi_pi8\n"
24788"#define _m_punpckhwd _mm_unpackhi_pi16\n"
24789"#define _m_punpckhdq _mm_unpackhi_pi32\n"
24790"#define _m_punpcklbw _mm_unpacklo_pi8\n"
24791"#define _m_punpcklwd _mm_unpacklo_pi16\n"
24792"#define _m_punpckldq _mm_unpacklo_pi32\n"
24793"#define _m_paddb _mm_add_pi8\n"
24794"#define _m_paddw _mm_add_pi16\n"
24795"#define _m_paddd _mm_add_pi32\n"
24796"#define _m_paddsb _mm_adds_pi8\n"
24797"#define _m_paddsw _mm_adds_pi16\n"
24798"#define _m_paddusb _mm_adds_pu8\n"
24799"#define _m_paddusw _mm_adds_pu16\n"
24800"#define _m_psubb _mm_sub_pi8\n"
24801"#define _m_psubw _mm_sub_pi16\n"
24802"#define _m_psubd _mm_sub_pi32\n"
24803"#define _m_psubsb _mm_subs_pi8\n"
24804"#define _m_psubsw _mm_subs_pi16\n"
24805"#define _m_psubusb _mm_subs_pu8\n"
24806"#define _m_psubusw _mm_subs_pu16\n"
24807"#define _m_pmaddwd _mm_madd_pi16\n"
24808"#define _m_pmulhw _mm_mulhi_pi16\n"
24809"#define _m_pmullw _mm_mullo_pi16\n"
24810"#define _m_psllw _mm_sll_pi16\n"
24811"#define _m_psllwi _mm_slli_pi16\n"
24812"#define _m_pslld _mm_sll_pi32\n"
24813"#define _m_pslldi _mm_slli_pi32\n"
24814"#define _m_psllq _mm_sll_si64\n"
24815"#define _m_psllqi _mm_slli_si64\n"
24816"#define _m_psraw _mm_sra_pi16\n"
24817"#define _m_psrawi _mm_srai_pi16\n"
24818"#define _m_psrad _mm_sra_pi32\n"
24819"#define _m_psradi _mm_srai_pi32\n"
24820"#define _m_psrlw _mm_srl_pi16\n"
24821"#define _m_psrlwi _mm_srli_pi16\n"
24822"#define _m_psrld _mm_srl_pi32\n"
24823"#define _m_psrldi _mm_srli_pi32\n"
24824"#define _m_psrlq _mm_srl_si64\n"
24825"#define _m_psrlqi _mm_srli_si64\n"
24826"#define _m_pand _mm_and_si64\n"
24827"#define _m_pandn _mm_andnot_si64\n"
24828"#define _m_por _mm_or_si64\n"
24829"#define _m_pxor _mm_xor_si64\n"
24830"#define _m_pcmpeqb _mm_cmpeq_pi8\n"
24831"#define _m_pcmpeqw _mm_cmpeq_pi16\n"
24832"#define _m_pcmpeqd _mm_cmpeq_pi32\n"
24833"#define _m_pcmpgtb _mm_cmpgt_pi8\n"
24834"#define _m_pcmpgtw _mm_cmpgt_pi16\n"
24835"#define _m_pcmpgtd _mm_cmpgt_pi32\n"
24836"\n"
24837"#endif /* __MMINTRIN_H */\n"
24838"\n"
24839"" } ,
24840 { "/builtins/movdirintrin.h" , "/*===------------------------- movdirintrin.h ------------------------------===\n"
24841" *\n"
24842" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
24843" * of this software and associated documentation files (the \"Software\"), to deal\n"
24844" * in the Software without restriction, including without limitation the rights\n"
24845" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
24846" * copies of the Software, and to permit persons to whom the Software is\n"
24847" * furnished to do so, subject to the following conditions:\n"
24848" *\n"
24849" * The above copyright notice and this permission notice shall be included in\n"
24850" * all copies or substantial portions of the Software.\n"
24851" *\n"
24852" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
24853" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
24854" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
24855" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
24856" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
24857" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
24858" * THE SOFTWARE.\n"
24859" *\n"
24860" *===-----------------------------------------------------------------------===\n"
24861" */\n"
24862"#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n"
24863"#error \"Never use <movdirintrin.h> directly; include <x86intrin.h> instead.\"\n"
24864"#endif\n"
24865"\n"
24866"#ifndef _MOVDIRINTRIN_H\n"
24867"#define _MOVDIRINTRIN_H\n"
24868"\n"
24869"/* Move doubleword as direct store */\n"
24870"static __inline__ void\n"
24871"__attribute__((__always_inline__, __nodebug__, __target__(\"movdiri\")))\n"
24872"_directstoreu_u32 (void *__dst, unsigned int __value)\n"
24873"{\n"
24874" __builtin_ia32_directstore_u32((unsigned int *)__dst, (unsigned int)__value);\n"
24875"}\n"
24876"\n"
24877"#ifdef __x86_64__\n"
24878"\n"
24879"/* Move quadword as direct store */\n"
24880"static __inline__ void\n"
24881"__attribute__((__always_inline__, __nodebug__, __target__(\"movdiri\")))\n"
24882"_directstoreu_u64 (void *__dst, unsigned long __value)\n"
24883"{\n"
24884" __builtin_ia32_directstore_u64((unsigned long *)__dst, __value);\n"
24885"}\n"
24886"\n"
24887"#endif /* __x86_64__ */\n"
24888"\n"
24889"/*\n"
24890" * movdir64b - Move 64 bytes as direct store.\n"
24891" * The destination must be 64 byte aligned, and the store is atomic.\n"
24892" * The source address has no alignment requirement, and the load from\n"
24893" * the source address is not atomic.\n"
24894" */\n"
24895"static __inline__ void\n"
24896"__attribute__((__always_inline__, __nodebug__, __target__(\"movdir64b\")))\n"
24897"_movdir64b (void *__dst __attribute__((align_value(64))), const void *__src)\n"
24898"{\n"
24899" __builtin_ia32_movdir64b(__dst, __src);\n"
24900"}\n"
24901"\n"
24902"#endif /* _MOVDIRINTRIN_H */\n"
24903"" } ,
24904 { "/builtins/msa.h" , "/*===---- msa.h - MIPS MSA intrinsics --------------------------------------===\n"
24905" *\n"
24906" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
24907" * of this software and associated documentation files (the \"Software\"), to deal\n"
24908" * in the Software without restriction, including without limitation the rights\n"
24909" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
24910" * copies of the Software, and to permit persons to whom the Software is\n"
24911" * furnished to do so, subject to the following conditions:\n"
24912" *\n"
24913" * The above copyright notice and this permission notice shall be included in\n"
24914" * all copies or substantial portions of the Software.\n"
24915" *\n"
24916" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
24917" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
24918" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
24919" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
24920" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
24921" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
24922" * THE SOFTWARE.\n"
24923" *\n"
24924" *===-----------------------------------------------------------------------===\n"
24925" */\n"
24926"\n"
24927"#ifndef _MSA_H\n"
24928"#define _MSA_H 1\n"
24929"\n"
24930"#if defined(__mips_msa)\n"
24931"typedef signed char v16i8 __attribute__((vector_size(16), aligned(16)));\n"
24932"typedef signed char v16i8_b __attribute__((vector_size(16), aligned(1)));\n"
24933"typedef unsigned char v16u8 __attribute__((vector_size(16), aligned(16)));\n"
24934"typedef unsigned char v16u8_b __attribute__((vector_size(16), aligned(1)));\n"
24935"typedef short v8i16 __attribute__((vector_size(16), aligned(16)));\n"
24936"typedef short v8i16_h __attribute__((vector_size(16), aligned(2)));\n"
24937"typedef unsigned short v8u16 __attribute__((vector_size(16), aligned(16)));\n"
24938"typedef unsigned short v8u16_h __attribute__((vector_size(16), aligned(2)));\n"
24939"typedef int v4i32 __attribute__((vector_size(16), aligned(16)));\n"
24940"typedef int v4i32_w __attribute__((vector_size(16), aligned(4)));\n"
24941"typedef unsigned int v4u32 __attribute__((vector_size(16), aligned(16)));\n"
24942"typedef unsigned int v4u32_w __attribute__((vector_size(16), aligned(4)));\n"
24943"typedef long long v2i64 __attribute__((vector_size(16), aligned(16)));\n"
24944"typedef long long v2i64_d __attribute__((vector_size(16), aligned(8)));\n"
24945"typedef unsigned long long v2u64 __attribute__((vector_size(16), aligned(16)));\n"
24946"typedef unsigned long long v2u64_d __attribute__((vector_size(16), aligned(8)));\n"
24947"typedef float v4f32 __attribute__((vector_size(16), aligned(16)));\n"
24948"typedef float v4f32_w __attribute__((vector_size(16), aligned(4)));\n"
24949"typedef double v2f64 __attribute__ ((vector_size(16), aligned(16)));\n"
24950"typedef double v2f64_d __attribute__ ((vector_size(16), aligned(8)));\n"
24951"\n"
24952"#define __msa_sll_b __builtin_msa_sll_b\n"
24953"#define __msa_sll_h __builtin_msa_sll_h\n"
24954"#define __msa_sll_w __builtin_msa_sll_w\n"
24955"#define __msa_sll_d __builtin_msa_sll_d\n"
24956"#define __msa_slli_b __builtin_msa_slli_b\n"
24957"#define __msa_slli_h __builtin_msa_slli_h\n"
24958"#define __msa_slli_w __builtin_msa_slli_w\n"
24959"#define __msa_slli_d __builtin_msa_slli_d\n"
24960"#define __msa_sra_b __builtin_msa_sra_b\n"
24961"#define __msa_sra_h __builtin_msa_sra_h\n"
24962"#define __msa_sra_w __builtin_msa_sra_w\n"
24963"#define __msa_sra_d __builtin_msa_sra_d\n"
24964"#define __msa_srai_b __builtin_msa_srai_b\n"
24965"#define __msa_srai_h __builtin_msa_srai_h\n"
24966"#define __msa_srai_w __builtin_msa_srai_w\n"
24967"#define __msa_srai_d __builtin_msa_srai_d\n"
24968"#define __msa_srar_b __builtin_msa_srar_b\n"
24969"#define __msa_srar_h __builtin_msa_srar_h\n"
24970"#define __msa_srar_w __builtin_msa_srar_w\n"
24971"#define __msa_srar_d __builtin_msa_srar_d\n"
24972"#define __msa_srari_b __builtin_msa_srari_b\n"
24973"#define __msa_srari_h __builtin_msa_srari_h\n"
24974"#define __msa_srari_w __builtin_msa_srari_w\n"
24975"#define __msa_srari_d __builtin_msa_srari_d\n"
24976"#define __msa_srl_b __builtin_msa_srl_b\n"
24977"#define __msa_srl_h __builtin_msa_srl_h\n"
24978"#define __msa_srl_w __builtin_msa_srl_w\n"
24979"#define __msa_srl_d __builtin_msa_srl_d\n"
24980"#define __msa_srli_b __builtin_msa_srli_b\n"
24981"#define __msa_srli_h __builtin_msa_srli_h\n"
24982"#define __msa_srli_w __builtin_msa_srli_w\n"
24983"#define __msa_srli_d __builtin_msa_srli_d\n"
24984"#define __msa_srlr_b __builtin_msa_srlr_b\n"
24985"#define __msa_srlr_h __builtin_msa_srlr_h\n"
24986"#define __msa_srlr_w __builtin_msa_srlr_w\n"
24987"#define __msa_srlr_d __builtin_msa_srlr_d\n"
24988"#define __msa_srlri_b __builtin_msa_srlri_b\n"
24989"#define __msa_srlri_h __builtin_msa_srlri_h\n"
24990"#define __msa_srlri_w __builtin_msa_srlri_w\n"
24991"#define __msa_srlri_d __builtin_msa_srlri_d\n"
24992"#define __msa_bclr_b __builtin_msa_bclr_b\n"
24993"#define __msa_bclr_h __builtin_msa_bclr_h\n"
24994"#define __msa_bclr_w __builtin_msa_bclr_w\n"
24995"#define __msa_bclr_d __builtin_msa_bclr_d\n"
24996"#define __msa_bclri_b __builtin_msa_bclri_b\n"
24997"#define __msa_bclri_h __builtin_msa_bclri_h\n"
24998"#define __msa_bclri_w __builtin_msa_bclri_w\n"
24999"#define __msa_bclri_d __builtin_msa_bclri_d\n"
25000"#define __msa_bset_b __builtin_msa_bset_b\n"
25001"#define __msa_bset_h __builtin_msa_bset_h\n"
25002"#define __msa_bset_w __builtin_msa_bset_w\n"
25003"#define __msa_bset_d __builtin_msa_bset_d\n"
25004"#define __msa_bseti_b __builtin_msa_bseti_b\n"
25005"#define __msa_bseti_h __builtin_msa_bseti_h\n"
25006"#define __msa_bseti_w __builtin_msa_bseti_w\n"
25007"#define __msa_bseti_d __builtin_msa_bseti_d\n"
25008"#define __msa_bneg_b __builtin_msa_bneg_b\n"
25009"#define __msa_bneg_h __builtin_msa_bneg_h\n"
25010"#define __msa_bneg_w __builtin_msa_bneg_w\n"
25011"#define __msa_bneg_d __builtin_msa_bneg_d\n"
25012"#define __msa_bnegi_b __builtin_msa_bnegi_b\n"
25013"#define __msa_bnegi_h __builtin_msa_bnegi_h\n"
25014"#define __msa_bnegi_w __builtin_msa_bnegi_w\n"
25015"#define __msa_bnegi_d __builtin_msa_bnegi_d\n"
25016"#define __msa_binsl_b __builtin_msa_binsl_b\n"
25017"#define __msa_binsl_h __builtin_msa_binsl_h\n"
25018"#define __msa_binsl_w __builtin_msa_binsl_w\n"
25019"#define __msa_binsl_d __builtin_msa_binsl_d\n"
25020"#define __msa_binsli_b __builtin_msa_binsli_b\n"
25021"#define __msa_binsli_h __builtin_msa_binsli_h\n"
25022"#define __msa_binsli_w __builtin_msa_binsli_w\n"
25023"#define __msa_binsli_d __builtin_msa_binsli_d\n"
25024"#define __msa_binsr_b __builtin_msa_binsr_b\n"
25025"#define __msa_binsr_h __builtin_msa_binsr_h\n"
25026"#define __msa_binsr_w __builtin_msa_binsr_w\n"
25027"#define __msa_binsr_d __builtin_msa_binsr_d\n"
25028"#define __msa_binsri_b __builtin_msa_binsri_b\n"
25029"#define __msa_binsri_h __builtin_msa_binsri_h\n"
25030"#define __msa_binsri_w __builtin_msa_binsri_w\n"
25031"#define __msa_binsri_d __builtin_msa_binsri_d\n"
25032"#define __msa_addv_b __builtin_msa_addv_b\n"
25033"#define __msa_addv_h __builtin_msa_addv_h\n"
25034"#define __msa_addv_w __builtin_msa_addv_w\n"
25035"#define __msa_addv_d __builtin_msa_addv_d\n"
25036"#define __msa_addvi_b __builtin_msa_addvi_b\n"
25037"#define __msa_addvi_h __builtin_msa_addvi_h\n"
25038"#define __msa_addvi_w __builtin_msa_addvi_w\n"
25039"#define __msa_addvi_d __builtin_msa_addvi_d\n"
25040"#define __msa_subv_b __builtin_msa_subv_b\n"
25041"#define __msa_subv_h __builtin_msa_subv_h\n"
25042"#define __msa_subv_w __builtin_msa_subv_w\n"
25043"#define __msa_subv_d __builtin_msa_subv_d\n"
25044"#define __msa_subvi_b __builtin_msa_subvi_b\n"
25045"#define __msa_subvi_h __builtin_msa_subvi_h\n"
25046"#define __msa_subvi_w __builtin_msa_subvi_w\n"
25047"#define __msa_subvi_d __builtin_msa_subvi_d\n"
25048"#define __msa_max_s_b __builtin_msa_max_s_b\n"
25049"#define __msa_max_s_h __builtin_msa_max_s_h\n"
25050"#define __msa_max_s_w __builtin_msa_max_s_w\n"
25051"#define __msa_max_s_d __builtin_msa_max_s_d\n"
25052"#define __msa_maxi_s_b __builtin_msa_maxi_s_b\n"
25053"#define __msa_maxi_s_h __builtin_msa_maxi_s_h\n"
25054"#define __msa_maxi_s_w __builtin_msa_maxi_s_w\n"
25055"#define __msa_maxi_s_d __builtin_msa_maxi_s_d\n"
25056"#define __msa_max_u_b __builtin_msa_max_u_b\n"
25057"#define __msa_max_u_h __builtin_msa_max_u_h\n"
25058"#define __msa_max_u_w __builtin_msa_max_u_w\n"
25059"#define __msa_max_u_d __builtin_msa_max_u_d\n"
25060"#define __msa_maxi_u_b __builtin_msa_maxi_u_b\n"
25061"#define __msa_maxi_u_h __builtin_msa_maxi_u_h\n"
25062"#define __msa_maxi_u_w __builtin_msa_maxi_u_w\n"
25063"#define __msa_maxi_u_d __builtin_msa_maxi_u_d\n"
25064"#define __msa_min_s_b __builtin_msa_min_s_b\n"
25065"#define __msa_min_s_h __builtin_msa_min_s_h\n"
25066"#define __msa_min_s_w __builtin_msa_min_s_w\n"
25067"#define __msa_min_s_d __builtin_msa_min_s_d\n"
25068"#define __msa_mini_s_b __builtin_msa_mini_s_b\n"
25069"#define __msa_mini_s_h __builtin_msa_mini_s_h\n"
25070"#define __msa_mini_s_w __builtin_msa_mini_s_w\n"
25071"#define __msa_mini_s_d __builtin_msa_mini_s_d\n"
25072"#define __msa_min_u_b __builtin_msa_min_u_b\n"
25073"#define __msa_min_u_h __builtin_msa_min_u_h\n"
25074"#define __msa_min_u_w __builtin_msa_min_u_w\n"
25075"#define __msa_min_u_d __builtin_msa_min_u_d\n"
25076"#define __msa_mini_u_b __builtin_msa_mini_u_b\n"
25077"#define __msa_mini_u_h __builtin_msa_mini_u_h\n"
25078"#define __msa_mini_u_w __builtin_msa_mini_u_w\n"
25079"#define __msa_mini_u_d __builtin_msa_mini_u_d\n"
25080"#define __msa_max_a_b __builtin_msa_max_a_b\n"
25081"#define __msa_max_a_h __builtin_msa_max_a_h\n"
25082"#define __msa_max_a_w __builtin_msa_max_a_w\n"
25083"#define __msa_max_a_d __builtin_msa_max_a_d\n"
25084"#define __msa_min_a_b __builtin_msa_min_a_b\n"
25085"#define __msa_min_a_h __builtin_msa_min_a_h\n"
25086"#define __msa_min_a_w __builtin_msa_min_a_w\n"
25087"#define __msa_min_a_d __builtin_msa_min_a_d\n"
25088"#define __msa_ceq_b __builtin_msa_ceq_b\n"
25089"#define __msa_ceq_h __builtin_msa_ceq_h\n"
25090"#define __msa_ceq_w __builtin_msa_ceq_w\n"
25091"#define __msa_ceq_d __builtin_msa_ceq_d\n"
25092"#define __msa_ceqi_b __builtin_msa_ceqi_b\n"
25093"#define __msa_ceqi_h __builtin_msa_ceqi_h\n"
25094"#define __msa_ceqi_w __builtin_msa_ceqi_w\n"
25095"#define __msa_ceqi_d __builtin_msa_ceqi_d\n"
25096"#define __msa_clt_s_b __builtin_msa_clt_s_b\n"
25097"#define __msa_clt_s_h __builtin_msa_clt_s_h\n"
25098"#define __msa_clt_s_w __builtin_msa_clt_s_w\n"
25099"#define __msa_clt_s_d __builtin_msa_clt_s_d\n"
25100"#define __msa_clti_s_b __builtin_msa_clti_s_b\n"
25101"#define __msa_clti_s_h __builtin_msa_clti_s_h\n"
25102"#define __msa_clti_s_w __builtin_msa_clti_s_w\n"
25103"#define __msa_clti_s_d __builtin_msa_clti_s_d\n"
25104"#define __msa_clt_u_b __builtin_msa_clt_u_b\n"
25105"#define __msa_clt_u_h __builtin_msa_clt_u_h\n"
25106"#define __msa_clt_u_w __builtin_msa_clt_u_w\n"
25107"#define __msa_clt_u_d __builtin_msa_clt_u_d\n"
25108"#define __msa_clti_u_b __builtin_msa_clti_u_b\n"
25109"#define __msa_clti_u_h __builtin_msa_clti_u_h\n"
25110"#define __msa_clti_u_w __builtin_msa_clti_u_w\n"
25111"#define __msa_clti_u_d __builtin_msa_clti_u_d\n"
25112"#define __msa_cle_s_b __builtin_msa_cle_s_b\n"
25113"#define __msa_cle_s_h __builtin_msa_cle_s_h\n"
25114"#define __msa_cle_s_w __builtin_msa_cle_s_w\n"
25115"#define __msa_cle_s_d __builtin_msa_cle_s_d\n"
25116"#define __msa_clei_s_b __builtin_msa_clei_s_b\n"
25117"#define __msa_clei_s_h __builtin_msa_clei_s_h\n"
25118"#define __msa_clei_s_w __builtin_msa_clei_s_w\n"
25119"#define __msa_clei_s_d __builtin_msa_clei_s_d\n"
25120"#define __msa_cle_u_b __builtin_msa_cle_u_b\n"
25121"#define __msa_cle_u_h __builtin_msa_cle_u_h\n"
25122"#define __msa_cle_u_w __builtin_msa_cle_u_w\n"
25123"#define __msa_cle_u_d __builtin_msa_cle_u_d\n"
25124"#define __msa_clei_u_b __builtin_msa_clei_u_b\n"
25125"#define __msa_clei_u_h __builtin_msa_clei_u_h\n"
25126"#define __msa_clei_u_w __builtin_msa_clei_u_w\n"
25127"#define __msa_clei_u_d __builtin_msa_clei_u_d\n"
25128"#define __msa_ld_b __builtin_msa_ld_b\n"
25129"#define __msa_ld_h __builtin_msa_ld_h\n"
25130"#define __msa_ld_w __builtin_msa_ld_w\n"
25131"#define __msa_ld_d __builtin_msa_ld_d\n"
25132"#define __msa_st_b __builtin_msa_st_b\n"
25133"#define __msa_st_h __builtin_msa_st_h\n"
25134"#define __msa_st_w __builtin_msa_st_w\n"
25135"#define __msa_st_d __builtin_msa_st_d\n"
25136"#define __msa_sat_s_b __builtin_msa_sat_s_b\n"
25137"#define __msa_sat_s_h __builtin_msa_sat_s_h\n"
25138"#define __msa_sat_s_w __builtin_msa_sat_s_w\n"
25139"#define __msa_sat_s_d __builtin_msa_sat_s_d\n"
25140"#define __msa_sat_u_b __builtin_msa_sat_u_b\n"
25141"#define __msa_sat_u_h __builtin_msa_sat_u_h\n"
25142"#define __msa_sat_u_w __builtin_msa_sat_u_w\n"
25143"#define __msa_sat_u_d __builtin_msa_sat_u_d\n"
25144"#define __msa_add_a_b __builtin_msa_add_a_b\n"
25145"#define __msa_add_a_h __builtin_msa_add_a_h\n"
25146"#define __msa_add_a_w __builtin_msa_add_a_w\n"
25147"#define __msa_add_a_d __builtin_msa_add_a_d\n"
25148"#define __msa_adds_a_b __builtin_msa_adds_a_b\n"
25149"#define __msa_adds_a_h __builtin_msa_adds_a_h\n"
25150"#define __msa_adds_a_w __builtin_msa_adds_a_w\n"
25151"#define __msa_adds_a_d __builtin_msa_adds_a_d\n"
25152"#define __msa_adds_s_b __builtin_msa_adds_s_b\n"
25153"#define __msa_adds_s_h __builtin_msa_adds_s_h\n"
25154"#define __msa_adds_s_w __builtin_msa_adds_s_w\n"
25155"#define __msa_adds_s_d __builtin_msa_adds_s_d\n"
25156"#define __msa_adds_u_b __builtin_msa_adds_u_b\n"
25157"#define __msa_adds_u_h __builtin_msa_adds_u_h\n"
25158"#define __msa_adds_u_w __builtin_msa_adds_u_w\n"
25159"#define __msa_adds_u_d __builtin_msa_adds_u_d\n"
25160"#define __msa_ave_s_b __builtin_msa_ave_s_b\n"
25161"#define __msa_ave_s_h __builtin_msa_ave_s_h\n"
25162"#define __msa_ave_s_w __builtin_msa_ave_s_w\n"
25163"#define __msa_ave_s_d __builtin_msa_ave_s_d\n"
25164"#define __msa_ave_u_b __builtin_msa_ave_u_b\n"
25165"#define __msa_ave_u_h __builtin_msa_ave_u_h\n"
25166"#define __msa_ave_u_w __builtin_msa_ave_u_w\n"
25167"#define __msa_ave_u_d __builtin_msa_ave_u_d\n"
25168"#define __msa_aver_s_b __builtin_msa_aver_s_b\n"
25169"#define __msa_aver_s_h __builtin_msa_aver_s_h\n"
25170"#define __msa_aver_s_w __builtin_msa_aver_s_w\n"
25171"#define __msa_aver_s_d __builtin_msa_aver_s_d\n"
25172"#define __msa_aver_u_b __builtin_msa_aver_u_b\n"
25173"#define __msa_aver_u_h __builtin_msa_aver_u_h\n"
25174"#define __msa_aver_u_w __builtin_msa_aver_u_w\n"
25175"#define __msa_aver_u_d __builtin_msa_aver_u_d\n"
25176"#define __msa_subs_s_b __builtin_msa_subs_s_b\n"
25177"#define __msa_subs_s_h __builtin_msa_subs_s_h\n"
25178"#define __msa_subs_s_w __builtin_msa_subs_s_w\n"
25179"#define __msa_subs_s_d __builtin_msa_subs_s_d\n"
25180"#define __msa_subs_u_b __builtin_msa_subs_u_b\n"
25181"#define __msa_subs_u_h __builtin_msa_subs_u_h\n"
25182"#define __msa_subs_u_w __builtin_msa_subs_u_w\n"
25183"#define __msa_subs_u_d __builtin_msa_subs_u_d\n"
25184"#define __msa_subsuu_s_b __builtin_msa_subsuu_s_b\n"
25185"#define __msa_subsuu_s_h __builtin_msa_subsuu_s_h\n"
25186"#define __msa_subsuu_s_w __builtin_msa_subsuu_s_w\n"
25187"#define __msa_subsuu_s_d __builtin_msa_subsuu_s_d\n"
25188"#define __msa_subsus_u_b __builtin_msa_subsus_u_b\n"
25189"#define __msa_subsus_u_h __builtin_msa_subsus_u_h\n"
25190"#define __msa_subsus_u_w __builtin_msa_subsus_u_w\n"
25191"#define __msa_subsus_u_d __builtin_msa_subsus_u_d\n"
25192"#define __msa_asub_s_b __builtin_msa_asub_s_b\n"
25193"#define __msa_asub_s_h __builtin_msa_asub_s_h\n"
25194"#define __msa_asub_s_w __builtin_msa_asub_s_w\n"
25195"#define __msa_asub_s_d __builtin_msa_asub_s_d\n"
25196"#define __msa_asub_u_b __builtin_msa_asub_u_b\n"
25197"#define __msa_asub_u_h __builtin_msa_asub_u_h\n"
25198"#define __msa_asub_u_w __builtin_msa_asub_u_w\n"
25199"#define __msa_asub_u_d __builtin_msa_asub_u_d\n"
25200"#define __msa_mulv_b __builtin_msa_mulv_b\n"
25201"#define __msa_mulv_h __builtin_msa_mulv_h\n"
25202"#define __msa_mulv_w __builtin_msa_mulv_w\n"
25203"#define __msa_mulv_d __builtin_msa_mulv_d\n"
25204"#define __msa_maddv_b __builtin_msa_maddv_b\n"
25205"#define __msa_maddv_h __builtin_msa_maddv_h\n"
25206"#define __msa_maddv_w __builtin_msa_maddv_w\n"
25207"#define __msa_maddv_d __builtin_msa_maddv_d\n"
25208"#define __msa_msubv_b __builtin_msa_msubv_b\n"
25209"#define __msa_msubv_h __builtin_msa_msubv_h\n"
25210"#define __msa_msubv_w __builtin_msa_msubv_w\n"
25211"#define __msa_msubv_d __builtin_msa_msubv_d\n"
25212"#define __msa_div_s_b __builtin_msa_div_s_b\n"
25213"#define __msa_div_s_h __builtin_msa_div_s_h\n"
25214"#define __msa_div_s_w __builtin_msa_div_s_w\n"
25215"#define __msa_div_s_d __builtin_msa_div_s_d\n"
25216"#define __msa_div_u_b __builtin_msa_div_u_b\n"
25217"#define __msa_div_u_h __builtin_msa_div_u_h\n"
25218"#define __msa_div_u_w __builtin_msa_div_u_w\n"
25219"#define __msa_div_u_d __builtin_msa_div_u_d\n"
25220"#define __msa_hadd_s_h __builtin_msa_hadd_s_h\n"
25221"#define __msa_hadd_s_w __builtin_msa_hadd_s_w\n"
25222"#define __msa_hadd_s_d __builtin_msa_hadd_s_d\n"
25223"#define __msa_hadd_u_h __builtin_msa_hadd_u_h\n"
25224"#define __msa_hadd_u_w __builtin_msa_hadd_u_w\n"
25225"#define __msa_hadd_u_d __builtin_msa_hadd_u_d\n"
25226"#define __msa_hsub_s_h __builtin_msa_hsub_s_h\n"
25227"#define __msa_hsub_s_w __builtin_msa_hsub_s_w\n"
25228"#define __msa_hsub_s_d __builtin_msa_hsub_s_d\n"
25229"#define __msa_hsub_u_h __builtin_msa_hsub_u_h\n"
25230"#define __msa_hsub_u_w __builtin_msa_hsub_u_w\n"
25231"#define __msa_hsub_u_d __builtin_msa_hsub_u_d\n"
25232"#define __msa_mod_s_b __builtin_msa_mod_s_b\n"
25233"#define __msa_mod_s_h __builtin_msa_mod_s_h\n"
25234"#define __msa_mod_s_w __builtin_msa_mod_s_w\n"
25235"#define __msa_mod_s_d __builtin_msa_mod_s_d\n"
25236"#define __msa_mod_u_b __builtin_msa_mod_u_b\n"
25237"#define __msa_mod_u_h __builtin_msa_mod_u_h\n"
25238"#define __msa_mod_u_w __builtin_msa_mod_u_w\n"
25239"#define __msa_mod_u_d __builtin_msa_mod_u_d\n"
25240"#define __msa_dotp_s_h __builtin_msa_dotp_s_h\n"
25241"#define __msa_dotp_s_w __builtin_msa_dotp_s_w\n"
25242"#define __msa_dotp_s_d __builtin_msa_dotp_s_d\n"
25243"#define __msa_dotp_u_h __builtin_msa_dotp_u_h\n"
25244"#define __msa_dotp_u_w __builtin_msa_dotp_u_w\n"
25245"#define __msa_dotp_u_d __builtin_msa_dotp_u_d\n"
25246"#define __msa_dpadd_s_h __builtin_msa_dpadd_s_h\n"
25247"#define __msa_dpadd_s_w __builtin_msa_dpadd_s_w\n"
25248"#define __msa_dpadd_s_d __builtin_msa_dpadd_s_d\n"
25249"#define __msa_dpadd_u_h __builtin_msa_dpadd_u_h\n"
25250"#define __msa_dpadd_u_w __builtin_msa_dpadd_u_w\n"
25251"#define __msa_dpadd_u_d __builtin_msa_dpadd_u_d\n"
25252"#define __msa_dpsub_s_h __builtin_msa_dpsub_s_h\n"
25253"#define __msa_dpsub_s_w __builtin_msa_dpsub_s_w\n"
25254"#define __msa_dpsub_s_d __builtin_msa_dpsub_s_d\n"
25255"#define __msa_dpsub_u_h __builtin_msa_dpsub_u_h\n"
25256"#define __msa_dpsub_u_w __builtin_msa_dpsub_u_w\n"
25257"#define __msa_dpsub_u_d __builtin_msa_dpsub_u_d\n"
25258"#define __msa_sld_b __builtin_msa_sld_b\n"
25259"#define __msa_sld_h __builtin_msa_sld_h\n"
25260"#define __msa_sld_w __builtin_msa_sld_w\n"
25261"#define __msa_sld_d __builtin_msa_sld_d\n"
25262"#define __msa_sldi_b __builtin_msa_sldi_b\n"
25263"#define __msa_sldi_h __builtin_msa_sldi_h\n"
25264"#define __msa_sldi_w __builtin_msa_sldi_w\n"
25265"#define __msa_sldi_d __builtin_msa_sldi_d\n"
25266"#define __msa_splat_b __builtin_msa_splat_b\n"
25267"#define __msa_splat_h __builtin_msa_splat_h\n"
25268"#define __msa_splat_w __builtin_msa_splat_w\n"
25269"#define __msa_splat_d __builtin_msa_splat_d\n"
25270"#define __msa_splati_b __builtin_msa_splati_b\n"
25271"#define __msa_splati_h __builtin_msa_splati_h\n"
25272"#define __msa_splati_w __builtin_msa_splati_w\n"
25273"#define __msa_splati_d __builtin_msa_splati_d\n"
25274"#define __msa_pckev_b __builtin_msa_pckev_b\n"
25275"#define __msa_pckev_h __builtin_msa_pckev_h\n"
25276"#define __msa_pckev_w __builtin_msa_pckev_w\n"
25277"#define __msa_pckev_d __builtin_msa_pckev_d\n"
25278"#define __msa_pckod_b __builtin_msa_pckod_b\n"
25279"#define __msa_pckod_h __builtin_msa_pckod_h\n"
25280"#define __msa_pckod_w __builtin_msa_pckod_w\n"
25281"#define __msa_pckod_d __builtin_msa_pckod_d\n"
25282"#define __msa_ilvl_b __builtin_msa_ilvl_b\n"
25283"#define __msa_ilvl_h __builtin_msa_ilvl_h\n"
25284"#define __msa_ilvl_w __builtin_msa_ilvl_w\n"
25285"#define __msa_ilvl_d __builtin_msa_ilvl_d\n"
25286"#define __msa_ilvr_b __builtin_msa_ilvr_b\n"
25287"#define __msa_ilvr_h __builtin_msa_ilvr_h\n"
25288"#define __msa_ilvr_w __builtin_msa_ilvr_w\n"
25289"#define __msa_ilvr_d __builtin_msa_ilvr_d\n"
25290"#define __msa_ilvev_b __builtin_msa_ilvev_b\n"
25291"#define __msa_ilvev_h __builtin_msa_ilvev_h\n"
25292"#define __msa_ilvev_w __builtin_msa_ilvev_w\n"
25293"#define __msa_ilvev_d __builtin_msa_ilvev_d\n"
25294"#define __msa_ilvod_b __builtin_msa_ilvod_b\n"
25295"#define __msa_ilvod_h __builtin_msa_ilvod_h\n"
25296"#define __msa_ilvod_w __builtin_msa_ilvod_w\n"
25297"#define __msa_ilvod_d __builtin_msa_ilvod_d\n"
25298"#define __msa_vshf_b __builtin_msa_vshf_b\n"
25299"#define __msa_vshf_h __builtin_msa_vshf_h\n"
25300"#define __msa_vshf_w __builtin_msa_vshf_w\n"
25301"#define __msa_vshf_d __builtin_msa_vshf_d\n"
25302"#define __msa_and_v __builtin_msa_and_v\n"
25303"#define __msa_andi_b __builtin_msa_andi_b\n"
25304"#define __msa_or_v __builtin_msa_or_v\n"
25305"#define __msa_ori_b __builtin_msa_ori_b\n"
25306"#define __msa_nor_v __builtin_msa_nor_v\n"
25307"#define __msa_nori_b __builtin_msa_nori_b\n"
25308"#define __msa_xor_v __builtin_msa_xor_v\n"
25309"#define __msa_xori_b __builtin_msa_xori_b\n"
25310"#define __msa_bmnz_v __builtin_msa_bmnz_v\n"
25311"#define __msa_bmnzi_b __builtin_msa_bmnzi_b\n"
25312"#define __msa_bmz_v __builtin_msa_bmz_v\n"
25313"#define __msa_bmzi_b __builtin_msa_bmzi_b\n"
25314"#define __msa_bsel_v __builtin_msa_bsel_v\n"
25315"#define __msa_bseli_b __builtin_msa_bseli_b\n"
25316"#define __msa_shf_b __builtin_msa_shf_b\n"
25317"#define __msa_shf_h __builtin_msa_shf_h\n"
25318"#define __msa_shf_w __builtin_msa_shf_w\n"
25319"#define __msa_test_bnz_v __builtin_msa_bnz_v\n"
25320"#define __msa_test_bz_v __builtin_msa_bz_v\n"
25321"#define __msa_fill_b __builtin_msa_fill_b\n"
25322"#define __msa_fill_h __builtin_msa_fill_h\n"
25323"#define __msa_fill_w __builtin_msa_fill_w\n"
25324"#define __msa_fill_d __builtin_msa_fill_d\n"
25325"#define __msa_pcnt_b __builtin_msa_pcnt_b\n"
25326"#define __msa_pcnt_h __builtin_msa_pcnt_h\n"
25327"#define __msa_pcnt_w __builtin_msa_pcnt_w\n"
25328"#define __msa_pcnt_d __builtin_msa_pcnt_d\n"
25329"#define __msa_nloc_b __builtin_msa_nloc_b\n"
25330"#define __msa_nloc_h __builtin_msa_nloc_h\n"
25331"#define __msa_nloc_w __builtin_msa_nloc_w\n"
25332"#define __msa_nloc_d __builtin_msa_nloc_d\n"
25333"#define __msa_nlzc_b __builtin_msa_nlzc_b\n"
25334"#define __msa_nlzc_h __builtin_msa_nlzc_h\n"
25335"#define __msa_nlzc_w __builtin_msa_nlzc_w\n"
25336"#define __msa_nlzc_d __builtin_msa_nlzc_d\n"
25337"#define __msa_copy_s_b __builtin_msa_copy_s_b\n"
25338"#define __msa_copy_s_h __builtin_msa_copy_s_h\n"
25339"#define __msa_copy_s_w __builtin_msa_copy_s_w\n"
25340"#define __msa_copy_s_d __builtin_msa_copy_s_d\n"
25341"#define __msa_copy_u_b __builtin_msa_copy_u_b\n"
25342"#define __msa_copy_u_h __builtin_msa_copy_u_h\n"
25343"#define __msa_copy_u_w __builtin_msa_copy_u_w\n"
25344"#define __msa_copy_u_d __builtin_msa_copy_u_d\n"
25345"#define __msa_insert_b __builtin_msa_insert_b\n"
25346"#define __msa_insert_h __builtin_msa_insert_h\n"
25347"#define __msa_insert_w __builtin_msa_insert_w\n"
25348"#define __msa_insert_d __builtin_msa_insert_d\n"
25349"#define __msa_insve_b __builtin_msa_insve_b\n"
25350"#define __msa_insve_h __builtin_msa_insve_h\n"
25351"#define __msa_insve_w __builtin_msa_insve_w\n"
25352"#define __msa_insve_d __builtin_msa_insve_d\n"
25353"#define __msa_test_bnz_b __builtin_msa_bnz_b\n"
25354"#define __msa_test_bnz_h __builtin_msa_bnz_h\n"
25355"#define __msa_test_bnz_w __builtin_msa_bnz_w\n"
25356"#define __msa_test_bnz_d __builtin_msa_bnz_d\n"
25357"#define __msa_test_bz_b __builtin_msa_bz_b\n"
25358"#define __msa_test_bz_h __builtin_msa_bz_h\n"
25359"#define __msa_test_bz_w __builtin_msa_bz_w\n"
25360"#define __msa_test_bz_d __builtin_msa_bz_d\n"
25361"#define __msa_ldi_b __builtin_msa_ldi_b\n"
25362"#define __msa_ldi_h __builtin_msa_ldi_h\n"
25363"#define __msa_ldi_w __builtin_msa_ldi_w\n"
25364"#define __msa_ldi_d __builtin_msa_ldi_d\n"
25365"#define __msa_fcaf_w __builtin_msa_fcaf_w\n"
25366"#define __msa_fcaf_d __builtin_msa_fcaf_d\n"
25367"#define __msa_fcor_w __builtin_msa_fcor_w\n"
25368"#define __msa_fcor_d __builtin_msa_fcor_d\n"
25369"#define __msa_fcun_w __builtin_msa_fcun_w\n"
25370"#define __msa_fcun_d __builtin_msa_fcun_d\n"
25371"#define __msa_fcune_w __builtin_msa_fcune_w\n"
25372"#define __msa_fcune_d __builtin_msa_fcune_d\n"
25373"#define __msa_fcueq_w __builtin_msa_fcueq_w\n"
25374"#define __msa_fcueq_d __builtin_msa_fcueq_d\n"
25375"#define __msa_fceq_w __builtin_msa_fceq_w\n"
25376"#define __msa_fceq_d __builtin_msa_fceq_d\n"
25377"#define __msa_fcne_w __builtin_msa_fcne_w\n"
25378"#define __msa_fcne_d __builtin_msa_fcne_d\n"
25379"#define __msa_fclt_w __builtin_msa_fclt_w\n"
25380"#define __msa_fclt_d __builtin_msa_fclt_d\n"
25381"#define __msa_fcult_w __builtin_msa_fcult_w\n"
25382"#define __msa_fcult_d __builtin_msa_fcult_d\n"
25383"#define __msa_fcle_w __builtin_msa_fcle_w\n"
25384"#define __msa_fcle_d __builtin_msa_fcle_d\n"
25385"#define __msa_fcule_w __builtin_msa_fcule_w\n"
25386"#define __msa_fcule_d __builtin_msa_fcule_d\n"
25387"#define __msa_fsaf_w __builtin_msa_fsaf_w\n"
25388"#define __msa_fsaf_d __builtin_msa_fsaf_d\n"
25389"#define __msa_fsor_w __builtin_msa_fsor_w\n"
25390"#define __msa_fsor_d __builtin_msa_fsor_d\n"
25391"#define __msa_fsun_w __builtin_msa_fsun_w\n"
25392"#define __msa_fsun_d __builtin_msa_fsun_d\n"
25393"#define __msa_fsune_w __builtin_msa_fsune_w\n"
25394"#define __msa_fsune_d __builtin_msa_fsune_d\n"
25395"#define __msa_fsueq_w __builtin_msa_fsueq_w\n"
25396"#define __msa_fsueq_d __builtin_msa_fsueq_d\n"
25397"#define __msa_fseq_w __builtin_msa_fseq_w\n"
25398"#define __msa_fseq_d __builtin_msa_fseq_d\n"
25399"#define __msa_fsne_w __builtin_msa_fsne_w\n"
25400"#define __msa_fsne_d __builtin_msa_fsne_d\n"
25401"#define __msa_fslt_w __builtin_msa_fslt_w\n"
25402"#define __msa_fslt_d __builtin_msa_fslt_d\n"
25403"#define __msa_fsult_w __builtin_msa_fsult_w\n"
25404"#define __msa_fsult_d __builtin_msa_fsult_d\n"
25405"#define __msa_fsle_w __builtin_msa_fsle_w\n"
25406"#define __msa_fsle_d __builtin_msa_fsle_d\n"
25407"#define __msa_fsule_w __builtin_msa_fsule_w\n"
25408"#define __msa_fsule_d __builtin_msa_fsule_d\n"
25409"#define __msa_fadd_w __builtin_msa_fadd_w\n"
25410"#define __msa_fadd_d __builtin_msa_fadd_d\n"
25411"#define __msa_fsub_w __builtin_msa_fsub_w\n"
25412"#define __msa_fsub_d __builtin_msa_fsub_d\n"
25413"#define __msa_fmul_w __builtin_msa_fmul_w\n"
25414"#define __msa_fmul_d __builtin_msa_fmul_d\n"
25415"#define __msa_fdiv_w __builtin_msa_fdiv_w\n"
25416"#define __msa_fdiv_d __builtin_msa_fdiv_d\n"
25417"#define __msa_fmadd_w __builtin_msa_fmadd_w\n"
25418"#define __msa_fmadd_d __builtin_msa_fmadd_d\n"
25419"#define __msa_fmsub_w __builtin_msa_fmsub_w\n"
25420"#define __msa_fmsub_d __builtin_msa_fmsub_d\n"
25421"#define __msa_fexp2_w __builtin_msa_fexp2_w\n"
25422"#define __msa_fexp2_d __builtin_msa_fexp2_d\n"
25423"#define __msa_fexdo_h __builtin_msa_fexdo_h\n"
25424"#define __msa_fexdo_w __builtin_msa_fexdo_w\n"
25425"#define __msa_ftq_h __builtin_msa_ftq_h\n"
25426"#define __msa_ftq_w __builtin_msa_ftq_w\n"
25427"#define __msa_fmin_w __builtin_msa_fmin_w\n"
25428"#define __msa_fmin_d __builtin_msa_fmin_d\n"
25429"#define __msa_fmin_a_w __builtin_msa_fmin_a_w\n"
25430"#define __msa_fmin_a_d __builtin_msa_fmin_a_d\n"
25431"#define __msa_fmax_w __builtin_msa_fmax_w\n"
25432"#define __msa_fmax_d __builtin_msa_fmax_d\n"
25433"#define __msa_fmax_a_w __builtin_msa_fmax_a_w\n"
25434"#define __msa_fmax_a_d __builtin_msa_fmax_a_d\n"
25435"#define __msa_mul_q_h __builtin_msa_mul_q_h\n"
25436"#define __msa_mul_q_w __builtin_msa_mul_q_w\n"
25437"#define __msa_mulr_q_h __builtin_msa_mulr_q_h\n"
25438"#define __msa_mulr_q_w __builtin_msa_mulr_q_w\n"
25439"#define __msa_madd_q_h __builtin_msa_madd_q_h\n"
25440"#define __msa_madd_q_w __builtin_msa_madd_q_w\n"
25441"#define __msa_maddr_q_h __builtin_msa_maddr_q_h\n"
25442"#define __msa_maddr_q_w __builtin_msa_maddr_q_w\n"
25443"#define __msa_msub_q_h __builtin_msa_msub_q_h\n"
25444"#define __msa_msub_q_w __builtin_msa_msub_q_w\n"
25445"#define __msa_msubr_q_h __builtin_msa_msubr_q_h\n"
25446"#define __msa_msubr_q_w __builtin_msa_msubr_q_w\n"
25447"#define __msa_fclass_w __builtin_msa_fclass_w\n"
25448"#define __msa_fclass_d __builtin_msa_fclass_d\n"
25449"#define __msa_fsqrt_w __builtin_msa_fsqrt_w\n"
25450"#define __msa_fsqrt_d __builtin_msa_fsqrt_d\n"
25451"#define __msa_frcp_w __builtin_msa_frcp_w\n"
25452"#define __msa_frcp_d __builtin_msa_frcp_d\n"
25453"#define __msa_frint_w __builtin_msa_frint_w\n"
25454"#define __msa_frint_d __builtin_msa_frint_d\n"
25455"#define __msa_frsqrt_w __builtin_msa_frsqrt_w\n"
25456"#define __msa_frsqrt_d __builtin_msa_frsqrt_d\n"
25457"#define __msa_flog2_w __builtin_msa_flog2_w\n"
25458"#define __msa_flog2_d __builtin_msa_flog2_d\n"
25459"#define __msa_fexupl_w __builtin_msa_fexupl_w\n"
25460"#define __msa_fexupl_d __builtin_msa_fexupl_d\n"
25461"#define __msa_fexupr_w __builtin_msa_fexupr_w\n"
25462"#define __msa_fexupr_d __builtin_msa_fexupr_d\n"
25463"#define __msa_ffql_w __builtin_msa_ffql_w\n"
25464"#define __msa_ffql_d __builtin_msa_ffql_d\n"
25465"#define __msa_ffqr_w __builtin_msa_ffqr_w\n"
25466"#define __msa_ffqr_d __builtin_msa_ffqr_d\n"
25467"#define __msa_ftint_s_w __builtin_msa_ftint_s_w\n"
25468"#define __msa_ftint_s_d __builtin_msa_ftint_s_d\n"
25469"#define __msa_ftint_u_w __builtin_msa_ftint_u_w\n"
25470"#define __msa_ftint_u_d __builtin_msa_ftint_u_d\n"
25471"#define __msa_ftrunc_s_w __builtin_msa_ftrunc_s_w\n"
25472"#define __msa_ftrunc_s_d __builtin_msa_ftrunc_s_d\n"
25473"#define __msa_ftrunc_u_w __builtin_msa_ftrunc_u_w\n"
25474"#define __msa_ftrunc_u_d __builtin_msa_ftrunc_u_d\n"
25475"#define __msa_ffint_s_w __builtin_msa_ffint_s_w\n"
25476"#define __msa_ffint_s_d __builtin_msa_ffint_s_d\n"
25477"#define __msa_ffint_u_w __builtin_msa_ffint_u_w\n"
25478"#define __msa_ffint_u_d __builtin_msa_ffint_u_d\n"
25479"#define __msa_cfcmsa __builtin_msa_cfcmsa\n"
25480"#define __msa_move_v __builtin_msa_move_v\n"
25481"#define __msa_cast_to_vector_float __builtin_msa_cast_to_vector_float\n"
25482"#define __msa_cast_to_vector_double __builtin_msa_cast_to_vector_double\n"
25483"#define __msa_cast_to_scalar_float __builtin_msa_cast_to_scalar_float\n"
25484"#define __msa_cast_to_scalar_double __builtin_msa_cast_to_scalar_double\n"
25485"#endif /* defined(__mips_msa) */\n"
25486"#endif /* _MSA_H */\n"
25487"" } ,
25488 { "/builtins/mwaitxintrin.h" , "/*===---- mwaitxintrin.h - MONITORX/MWAITX intrinsics ----------------------===\n"
25489" *\n"
25490" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
25491" * of this software and associated documentation files (the \"Software\"), to deal\n"
25492" * in the Software without restriction, including without limitation the rights\n"
25493" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
25494" * copies of the Software, and to permit persons to whom the Software is\n"
25495" * furnished to do so, subject to the following conditions:\n"
25496" *\n"
25497" * The above copyright notice and this permission notice shall be included in\n"
25498" * all copies or substantial portions of the Software.\n"
25499" *\n"
25500" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
25501" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
25502" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
25503" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
25504" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
25505" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
25506" * THE SOFTWARE.\n"
25507" *\n"
25508" *===-----------------------------------------------------------------------===\n"
25509" */\n"
25510"\n"
25511"#ifndef __X86INTRIN_H\n"
25512"#error \"Never use <mwaitxintrin.h> directly; include <x86intrin.h> instead.\"\n"
25513"#endif\n"
25514"\n"
25515"#ifndef __MWAITXINTRIN_H\n"
25516"#define __MWAITXINTRIN_H\n"
25517"\n"
25518"/* Define the default attributes for the functions in this file. */\n"
25519"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"mwaitx\")))\n"
25520"static __inline__ void __DEFAULT_FN_ATTRS\n"
25521"_mm_monitorx(void const * __p, unsigned __extensions, unsigned __hints)\n"
25522"{\n"
25523" __builtin_ia32_monitorx((void *)__p, __extensions, __hints);\n"
25524"}\n"
25525"\n"
25526"static __inline__ void __DEFAULT_FN_ATTRS\n"
25527"_mm_mwaitx(unsigned __extensions, unsigned __hints, unsigned __clock)\n"
25528"{\n"
25529" __builtin_ia32_mwaitx(__extensions, __hints, __clock);\n"
25530"}\n"
25531"\n"
25532"#undef __DEFAULT_FN_ATTRS\n"
25533"\n"
25534"#endif /* __MWAITXINTRIN_H */\n"
25535"" } ,
25536 { "/builtins/nmmintrin.h" , "/*===---- nmmintrin.h - SSE4 intrinsics ------------------------------------===\n"
25537" *\n"
25538" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
25539" * of this software and associated documentation files (the \"Software\"), to deal\n"
25540" * in the Software without restriction, including without limitation the rights\n"
25541" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
25542" * copies of the Software, and to permit persons to whom the Software is\n"
25543" * furnished to do so, subject to the following conditions:\n"
25544" *\n"
25545" * The above copyright notice and this permission notice shall be included in\n"
25546" * all copies or substantial portions of the Software.\n"
25547" *\n"
25548" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
25549" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
25550" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
25551" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
25552" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
25553" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
25554" * THE SOFTWARE.\n"
25555" *\n"
25556" *===-----------------------------------------------------------------------===\n"
25557" */\n"
25558"\n"
25559"#ifndef __NMMINTRIN_H\n"
25560"#define __NMMINTRIN_H\n"
25561"\n"
25562"/* To match expectations of gcc we put the sse4.2 definitions into smmintrin.h,\n"
25563" just include it now then. */\n"
25564"#include <smmintrin.h>\n"
25565"#endif /* __NMMINTRIN_H */\n"
25566"" } ,
25567 { "/builtins/omp.h" , "/*\n"
25568" * include/50/omp.h.var\n"
25569" */\n"
25570"\n"
25571"\n"
25572"//===----------------------------------------------------------------------===//\n"
25573"//\n"
25574"// The LLVM Compiler Infrastructure\n"
25575"//\n"
25576"// This file is dual licensed under the MIT and the University of Illinois Open\n"
25577"// Source Licenses. See LICENSE.txt for details.\n"
25578"//\n"
25579"//===----------------------------------------------------------------------===//\n"
25580"\n"
25581"\n"
25582"#ifndef __OMP_H\n"
25583"# define __OMP_H\n"
25584"\n"
25585"# define KMP_VERSION_MAJOR 5\n"
25586"# define KMP_VERSION_MINOR 0\n"
25587"# define KMP_VERSION_BUILD 20140926\n"
25588"# define KMP_BUILD_DATE \"No_Timestamp\"\n"
25589"\n"
25590"# ifdef __cplusplus\n"
25591" extern \"C\" {\n"
25592"# endif\n"
25593"\n"
25594"# if defined(_WIN32)\n"
25595"# define __KAI_KMPC_CONVENTION __cdecl\n"
25596"# else\n"
25597"# define __KAI_KMPC_CONVENTION\n"
25598"# endif\n"
25599"\n"
25600" /* schedule kind constants */\n"
25601" typedef enum omp_sched_t {\n"
25602" omp_sched_static = 1,\n"
25603" omp_sched_dynamic = 2,\n"
25604" omp_sched_guided = 3,\n"
25605" omp_sched_auto = 4\n"
25606" } omp_sched_t;\n"
25607"\n"
25608" /* set API functions */\n"
25609" extern void __KAI_KMPC_CONVENTION omp_set_num_threads (int);\n"
25610" extern void __KAI_KMPC_CONVENTION omp_set_dynamic (int);\n"
25611" extern void __KAI_KMPC_CONVENTION omp_set_nested (int);\n"
25612" extern void __KAI_KMPC_CONVENTION omp_set_max_active_levels (int);\n"
25613" extern void __KAI_KMPC_CONVENTION omp_set_schedule (omp_sched_t, int);\n"
25614"\n"
25615" /* query API functions */\n"
25616" extern int __KAI_KMPC_CONVENTION omp_get_num_threads (void);\n"
25617" extern int __KAI_KMPC_CONVENTION omp_get_dynamic (void);\n"
25618" extern int __KAI_KMPC_CONVENTION omp_get_nested (void);\n"
25619" extern int __KAI_KMPC_CONVENTION omp_get_max_threads (void);\n"
25620" extern int __KAI_KMPC_CONVENTION omp_get_thread_num (void);\n"
25621" extern int __KAI_KMPC_CONVENTION omp_get_num_procs (void);\n"
25622" extern int __KAI_KMPC_CONVENTION omp_in_parallel (void);\n"
25623" extern int __KAI_KMPC_CONVENTION omp_in_final (void);\n"
25624" extern int __KAI_KMPC_CONVENTION omp_get_active_level (void);\n"
25625" extern int __KAI_KMPC_CONVENTION omp_get_level (void);\n"
25626" extern int __KAI_KMPC_CONVENTION omp_get_ancestor_thread_num (int);\n"
25627" extern int __KAI_KMPC_CONVENTION omp_get_team_size (int);\n"
25628" extern int __KAI_KMPC_CONVENTION omp_get_thread_limit (void);\n"
25629" extern int __KAI_KMPC_CONVENTION omp_get_max_active_levels (void);\n"
25630" extern void __KAI_KMPC_CONVENTION omp_get_schedule (omp_sched_t *, int *);\n"
25631" extern int __KAI_KMPC_CONVENTION omp_get_max_task_priority (void);\n"
25632"\n"
25633" /* lock API functions */\n"
25634" typedef struct omp_lock_t {\n"
25635" void * _lk;\n"
25636" } omp_lock_t;\n"
25637"\n"
25638" extern void __KAI_KMPC_CONVENTION omp_init_lock (omp_lock_t *);\n"
25639" extern void __KAI_KMPC_CONVENTION omp_set_lock (omp_lock_t *);\n"
25640" extern void __KAI_KMPC_CONVENTION omp_unset_lock (omp_lock_t *);\n"
25641" extern void __KAI_KMPC_CONVENTION omp_destroy_lock (omp_lock_t *);\n"
25642" extern int __KAI_KMPC_CONVENTION omp_test_lock (omp_lock_t *);\n"
25643"\n"
25644" /* nested lock API functions */\n"
25645" typedef struct omp_nest_lock_t {\n"
25646" void * _lk;\n"
25647" } omp_nest_lock_t;\n"
25648"\n"
25649" extern void __KAI_KMPC_CONVENTION omp_init_nest_lock (omp_nest_lock_t *);\n"
25650" extern void __KAI_KMPC_CONVENTION omp_set_nest_lock (omp_nest_lock_t *);\n"
25651" extern void __KAI_KMPC_CONVENTION omp_unset_nest_lock (omp_nest_lock_t *);\n"
25652" extern void __KAI_KMPC_CONVENTION omp_destroy_nest_lock (omp_nest_lock_t *);\n"
25653" extern int __KAI_KMPC_CONVENTION omp_test_nest_lock (omp_nest_lock_t *);\n"
25654"\n"
25655" /* lock hint type for dynamic user lock */\n"
25656" typedef enum omp_lock_hint_t {\n"
25657" omp_lock_hint_none = 0,\n"
25658" omp_lock_hint_uncontended = 1,\n"
25659" omp_lock_hint_contended = (1<<1 ),\n"
25660" omp_lock_hint_nonspeculative = (1<<2 ),\n"
25661" omp_lock_hint_speculative = (1<<3 ),\n"
25662" kmp_lock_hint_hle = (1<<16),\n"
25663" kmp_lock_hint_rtm = (1<<17),\n"
25664" kmp_lock_hint_adaptive = (1<<18)\n"
25665" } omp_lock_hint_t;\n"
25666"\n"
25667" /* hinted lock initializers */\n"
25668" extern void __KAI_KMPC_CONVENTION omp_init_lock_with_hint(omp_lock_t *, omp_lock_hint_t);\n"
25669" extern void __KAI_KMPC_CONVENTION omp_init_nest_lock_with_hint(omp_nest_lock_t *, omp_lock_hint_t);\n"
25670"\n"
25671" /* time API functions */\n"
25672" extern double __KAI_KMPC_CONVENTION omp_get_wtime (void);\n"
25673" extern double __KAI_KMPC_CONVENTION omp_get_wtick (void);\n"
25674"\n"
25675" /* OpenMP 4.0 */\n"
25676" extern int __KAI_KMPC_CONVENTION omp_get_default_device (void);\n"
25677" extern void __KAI_KMPC_CONVENTION omp_set_default_device (int);\n"
25678" extern int __KAI_KMPC_CONVENTION omp_is_initial_device (void);\n"
25679" extern int __KAI_KMPC_CONVENTION omp_get_num_devices (void);\n"
25680" extern int __KAI_KMPC_CONVENTION omp_get_num_teams (void);\n"
25681" extern int __KAI_KMPC_CONVENTION omp_get_team_num (void);\n"
25682" extern int __KAI_KMPC_CONVENTION omp_get_cancellation (void);\n"
25683"\n"
25684"# include <stdlib.h>\n"
25685" /* OpenMP 4.5 */\n"
25686" extern int __KAI_KMPC_CONVENTION omp_get_initial_device (void);\n"
25687" extern void* __KAI_KMPC_CONVENTION omp_target_alloc(size_t, int);\n"
25688" extern void __KAI_KMPC_CONVENTION omp_target_free(void *, int);\n"
25689" extern int __KAI_KMPC_CONVENTION omp_target_is_present(void *, int);\n"
25690" extern int __KAI_KMPC_CONVENTION omp_target_memcpy(void *, void *, size_t, size_t, size_t, int, int);\n"
25691" extern int __KAI_KMPC_CONVENTION omp_target_memcpy_rect(void *, void *, size_t, int, const size_t *,\n"
25692" const size_t *, const size_t *, const size_t *, const size_t *, int, int);\n"
25693" extern int __KAI_KMPC_CONVENTION omp_target_associate_ptr(void *, void *, size_t, size_t, int);\n"
25694" extern int __KAI_KMPC_CONVENTION omp_target_disassociate_ptr(void *, int);\n"
25695"\n"
25696" /* kmp API functions */\n"
25697" extern int __KAI_KMPC_CONVENTION kmp_get_stacksize (void);\n"
25698" extern void __KAI_KMPC_CONVENTION kmp_set_stacksize (int);\n"
25699" extern size_t __KAI_KMPC_CONVENTION kmp_get_stacksize_s (void);\n"
25700" extern void __KAI_KMPC_CONVENTION kmp_set_stacksize_s (size_t);\n"
25701" extern int __KAI_KMPC_CONVENTION kmp_get_blocktime (void);\n"
25702" extern int __KAI_KMPC_CONVENTION kmp_get_library (void);\n"
25703" extern void __KAI_KMPC_CONVENTION kmp_set_blocktime (int);\n"
25704" extern void __KAI_KMPC_CONVENTION kmp_set_library (int);\n"
25705" extern void __KAI_KMPC_CONVENTION kmp_set_library_serial (void);\n"
25706" extern void __KAI_KMPC_CONVENTION kmp_set_library_turnaround (void);\n"
25707" extern void __KAI_KMPC_CONVENTION kmp_set_library_throughput (void);\n"
25708" extern void __KAI_KMPC_CONVENTION kmp_set_defaults (char const *);\n"
25709" extern void __KAI_KMPC_CONVENTION kmp_set_disp_num_buffers (int);\n"
25710"\n"
25711" /* Intel affinity API */\n"
25712" typedef void * kmp_affinity_mask_t;\n"
25713"\n"
25714" extern int __KAI_KMPC_CONVENTION kmp_set_affinity (kmp_affinity_mask_t *);\n"
25715" extern int __KAI_KMPC_CONVENTION kmp_get_affinity (kmp_affinity_mask_t *);\n"
25716" extern int __KAI_KMPC_CONVENTION kmp_get_affinity_max_proc (void);\n"
25717" extern void __KAI_KMPC_CONVENTION kmp_create_affinity_mask (kmp_affinity_mask_t *);\n"
25718" extern void __KAI_KMPC_CONVENTION kmp_destroy_affinity_mask (kmp_affinity_mask_t *);\n"
25719" extern int __KAI_KMPC_CONVENTION kmp_set_affinity_mask_proc (int, kmp_affinity_mask_t *);\n"
25720" extern int __KAI_KMPC_CONVENTION kmp_unset_affinity_mask_proc (int, kmp_affinity_mask_t *);\n"
25721" extern int __KAI_KMPC_CONVENTION kmp_get_affinity_mask_proc (int, kmp_affinity_mask_t *);\n"
25722"\n"
25723" /* OpenMP 4.0 affinity API */\n"
25724" typedef enum omp_proc_bind_t {\n"
25725" omp_proc_bind_false = 0,\n"
25726" omp_proc_bind_true = 1,\n"
25727" omp_proc_bind_master = 2,\n"
25728" omp_proc_bind_close = 3,\n"
25729" omp_proc_bind_spread = 4\n"
25730" } omp_proc_bind_t;\n"
25731"\n"
25732" extern omp_proc_bind_t __KAI_KMPC_CONVENTION omp_get_proc_bind (void);\n"
25733"\n"
25734" /* OpenMP 4.5 affinity API */\n"
25735" extern int __KAI_KMPC_CONVENTION omp_get_num_places (void);\n"
25736" extern int __KAI_KMPC_CONVENTION omp_get_place_num_procs (int);\n"
25737" extern void __KAI_KMPC_CONVENTION omp_get_place_proc_ids (int, int *);\n"
25738" extern int __KAI_KMPC_CONVENTION omp_get_place_num (void);\n"
25739" extern int __KAI_KMPC_CONVENTION omp_get_partition_num_places (void);\n"
25740" extern void __KAI_KMPC_CONVENTION omp_get_partition_place_nums (int *);\n"
25741"\n"
25742" extern void * __KAI_KMPC_CONVENTION kmp_malloc (size_t);\n"
25743" extern void * __KAI_KMPC_CONVENTION kmp_aligned_malloc (size_t, size_t);\n"
25744" extern void * __KAI_KMPC_CONVENTION kmp_calloc (size_t, size_t);\n"
25745" extern void * __KAI_KMPC_CONVENTION kmp_realloc (void *, size_t);\n"
25746" extern void __KAI_KMPC_CONVENTION kmp_free (void *);\n"
25747"\n"
25748" extern void __KAI_KMPC_CONVENTION kmp_set_warnings_on(void);\n"
25749" extern void __KAI_KMPC_CONVENTION kmp_set_warnings_off(void);\n"
25750"\n"
25751" /* OpenMP 5.0 Tool Control */\n"
25752" typedef enum omp_control_tool_result_t {\n"
25753" omp_control_tool_notool = -2,\n"
25754" omp_control_tool_nocallback = -1,\n"
25755" omp_control_tool_success = 0,\n"
25756" omp_control_tool_ignored = 1\n"
25757" } omp_control_tool_result_t;\n"
25758"\n"
25759" typedef enum omp_control_tool_t {\n"
25760" omp_control_tool_start = 1,\n"
25761" omp_control_tool_pause = 2,\n"
25762" omp_control_tool_flush = 3,\n"
25763" omp_control_tool_end = 4\n"
25764" } omp_control_tool_t;\n"
25765" \n"
25766" extern int __KAI_KMPC_CONVENTION omp_control_tool(int, int, void*);\n"
25767"\n"
25768"# undef __KAI_KMPC_CONVENTION\n"
25769"\n"
25770" /* Warning:\n"
25771" The following typedefs are not standard, deprecated and will be removed in a future release.\n"
25772" */\n"
25773" typedef int omp_int_t;\n"
25774" typedef double omp_wtime_t;\n"
25775"\n"
25776"# ifdef __cplusplus\n"
25777" }\n"
25778"# endif\n"
25779"\n"
25780"#endif /* __OMP_H */\n"
25781"\n"
25782"" } ,
25783 { "/builtins/ompt.h" , "/*\n"
25784" * include/50/ompt.h.var\n"
25785" */\n"
25786"\n"
25787"//===----------------------------------------------------------------------===//\n"
25788"//\n"
25789"// The LLVM Compiler Infrastructure\n"
25790"//\n"
25791"// This file is dual licensed under the MIT and the University of Illinois Open\n"
25792"// Source Licenses. See LICENSE.txt for details.\n"
25793"//\n"
25794"//===----------------------------------------------------------------------===//\n"
25795"\n"
25796"#ifndef __OMPT__\n"
25797"#define __OMPT__\n"
25798"\n"
25799"/*****************************************************************************\n"
25800" * system include files\n"
25801" *****************************************************************************/\n"
25802"\n"
25803"#include <stdint.h>\n"
25804"#include <stddef.h>\n"
25805"\n"
25806"\n"
25807"\n"
25808"/*****************************************************************************\n"
25809" * iteration macros\n"
25810" *****************************************************************************/\n"
25811"\n"
25812"#define FOREACH_OMPT_INQUIRY_FN(macro) \\\n"
25813" macro (ompt_enumerate_states) \\\n"
25814" macro (ompt_enumerate_mutex_impls) \\\n"
25815" \\\n"
25816" macro (ompt_set_callback) \\\n"
25817" macro (ompt_get_callback) \\\n"
25818" \\\n"
25819" macro (ompt_get_state) \\\n"
25820" \\\n"
25821" macro (ompt_get_parallel_info) \\\n"
25822" macro (ompt_get_task_info) \\\n"
25823" macro (ompt_get_thread_data) \\\n"
25824" macro (ompt_get_unique_id) \\\n"
25825" \\\n"
25826" macro(ompt_get_num_procs) \\\n"
25827" macro(ompt_get_num_places) \\\n"
25828" macro(ompt_get_place_proc_ids) \\\n"
25829" macro(ompt_get_place_num) \\\n"
25830" macro(ompt_get_partition_place_nums) \\\n"
25831" macro(ompt_get_proc_id) \\\n"
25832" \\\n"
25833" macro(ompt_get_target_info) \\\n"
25834" macro(ompt_get_num_devices)\n"
25835"\n"
25836"#define FOREACH_OMP_STATE(macro) \\\n"
25837" \\\n"
25838" /* first available state */ \\\n"
25839" macro (omp_state_undefined, 0x102) /* undefined thread state */ \\\n"
25840" \\\n"
25841" /* work states (0..15) */ \\\n"
25842" macro (omp_state_work_serial, 0x000) /* working outside parallel */ \\\n"
25843" macro (omp_state_work_parallel, 0x001) /* working within parallel */ \\\n"
25844" macro (omp_state_work_reduction, 0x002) /* performing a reduction */ \\\n"
25845" \\\n"
25846" /* barrier wait states (16..31) */ \\\n"
25847" macro (omp_state_wait_barrier, 0x010) /* waiting at a barrier */ \\\n"
25848" macro (omp_state_wait_barrier_implicit_parallel, 0x011) \\\n"
25849" /* implicit barrier at the end of parallel region */\\\n"
25850" macro (omp_state_wait_barrier_implicit_workshare, 0x012) \\\n"
25851" /* implicit barrier at the end of worksharing */ \\\n"
25852" macro (omp_state_wait_barrier_implicit, 0x013) /* implicit barrier */ \\\n"
25853" macro (omp_state_wait_barrier_explicit, 0x014) /* explicit barrier */ \\\n"
25854" \\\n"
25855" /* task wait states (32..63) */ \\\n"
25856" macro (omp_state_wait_taskwait, 0x020) /* waiting at a taskwait */ \\\n"
25857" macro (omp_state_wait_taskgroup, 0x021) /* waiting at a taskgroup */ \\\n"
25858" \\\n"
25859" /* mutex wait states (64..127) */ \\\n"
25860" macro (omp_state_wait_mutex, 0x040) \\\n"
25861" macro (omp_state_wait_lock, 0x041) /* waiting for lock */ \\\n"
25862" macro (omp_state_wait_critical, 0x042) /* waiting for critical */ \\\n"
25863" macro (omp_state_wait_atomic, 0x043) /* waiting for atomic */ \\\n"
25864" macro (omp_state_wait_ordered, 0x044) /* waiting for ordered */ \\\n"
25865" \\\n"
25866" /* target wait states (128..255) */ \\\n"
25867" macro (omp_state_wait_target, 0x080) /* waiting for target region */ \\\n"
25868" macro (omp_state_wait_target_map, 0x081) /* waiting for target data mapping operation */ \\\n"
25869" macro (omp_state_wait_target_update, 0x082) /* waiting for target update operation */ \\\n"
25870" \\\n"
25871" /* misc (256..511) */ \\\n"
25872" macro (omp_state_idle, 0x100) /* waiting for work */ \\\n"
25873" macro (omp_state_overhead, 0x101) /* overhead excluding wait states */ \\\n"
25874" \\\n"
25875" /* implementation-specific states (512..) */\n"
25876"\n"
25877"\n"
25878"#define FOREACH_KMP_MUTEX_IMPL(macro) \\\n"
25879" macro (ompt_mutex_impl_unknown, 0) /* unknown implementation */ \\\n"
25880" macro (kmp_mutex_impl_spin, 1) /* based on spin */ \\\n"
25881" macro (kmp_mutex_impl_queuing, 2) /* based on some fair policy */ \\\n"
25882" macro (kmp_mutex_impl_speculative, 3) /* based on HW-supported speculation */\n"
25883"\n"
25884"#define FOREACH_OMPT_EVENT(macro) \\\n"
25885" \\\n"
25886" /*--- Mandatory Events ---*/ \\\n"
25887" macro (ompt_callback_thread_begin, ompt_callback_thread_begin_t, 1) /* thread begin */ \\\n"
25888" macro (ompt_callback_thread_end, ompt_callback_thread_end_t, 2) /* thread end */ \\\n"
25889" \\\n"
25890" macro (ompt_callback_parallel_begin, ompt_callback_parallel_begin_t, 3) /* parallel begin */ \\\n"
25891" macro (ompt_callback_parallel_end, ompt_callback_parallel_end_t, 4) /* parallel end */ \\\n"
25892" \\\n"
25893" macro (ompt_callback_task_create, ompt_callback_task_create_t, 5) /* task begin */ \\\n"
25894" macro (ompt_callback_task_schedule, ompt_callback_task_schedule_t, 6) /* task schedule */ \\\n"
25895" macro (ompt_callback_implicit_task, ompt_callback_implicit_task_t, 7) /* implicit task */ \\\n"
25896" \\\n"
25897" macro (ompt_callback_target, ompt_callback_target_t, 8) /* target */ \\\n"
25898" macro (ompt_callback_target_data_op, ompt_callback_target_data_op_t, 9) /* target data op */ \\\n"
25899" macro (ompt_callback_target_submit, ompt_callback_target_submit_t, 10) /* target submit */ \\\n"
25900" \\\n"
25901" macro (ompt_callback_control_tool, ompt_callback_control_tool_t, 11) /* control tool */ \\\n"
25902" \\\n"
25903" macro (ompt_callback_device_initialize, ompt_callback_device_initialize_t, 12) /* device initialize */ \\\n"
25904" macro (ompt_callback_device_finalize, ompt_callback_device_finalize_t, 13) /* device finalize */ \\\n"
25905" \\\n"
25906" macro (ompt_callback_device_load, ompt_callback_device_load_t, 14) /* device load */ \\\n"
25907" macro (ompt_callback_device_unload, ompt_callback_device_unload_t, 15) /* device unload */ \\\n"
25908" \\\n"
25909" /* Optional Events */ \\\n"
25910" macro (ompt_callback_sync_region_wait, ompt_callback_sync_region_t, 16) /* sync region wait begin or end */ \\\n"
25911" \\\n"
25912" macro (ompt_callback_mutex_released, ompt_callback_mutex_t, 17) /* mutex released */ \\\n"
25913" \\\n"
25914" macro (ompt_callback_task_dependences, ompt_callback_task_dependences_t, 18) /* report task dependences */ \\\n"
25915" macro (ompt_callback_task_dependence, ompt_callback_task_dependence_t, 19) /* report task dependence */ \\\n"
25916" \\\n"
25917" macro (ompt_callback_work, ompt_callback_work_t, 20) /* task at work begin or end */ \\\n"
25918" \\\n"
25919" macro (ompt_callback_master, ompt_callback_master_t, 21) /* task at master begin or end */ \\\n"
25920" \\\n"
25921" macro (ompt_callback_target_map, ompt_callback_target_map_t, 22) /* target map */ \\\n"
25922" \\\n"
25923" macro (ompt_callback_sync_region, ompt_callback_sync_region_t, 23) /* sync region begin or end */ \\\n"
25924" \\\n"
25925" macro (ompt_callback_lock_init, ompt_callback_mutex_acquire_t, 24) /* lock init */ \\\n"
25926" macro (ompt_callback_lock_destroy, ompt_callback_mutex_t, 25) /* lock destroy */ \\\n"
25927" \\\n"
25928" macro (ompt_callback_mutex_acquire, ompt_callback_mutex_acquire_t, 26) /* mutex acquire */ \\\n"
25929" macro (ompt_callback_mutex_acquired, ompt_callback_mutex_t, 27) /* mutex acquired */ \\\n"
25930" \\\n"
25931" macro (ompt_callback_nest_lock, ompt_callback_nest_lock_t, 28) /* nest lock */ \\\n"
25932" \\\n"
25933" macro (ompt_callback_flush, ompt_callback_flush_t, 29) /* after executing flush */ \\\n"
25934" \\\n"
25935" macro (ompt_callback_cancel, ompt_callback_cancel_t, 30) /* cancel innermost binding region */ \\\n"
25936" macro (ompt_callback_idle, ompt_callback_idle_t, 31) /* begin or end idle state */\n"
25937"\n"
25938"\n"
25939"\n"
25940"/*****************************************************************************\n"
25941" * data types\n"
25942" *****************************************************************************/\n"
25943"\n"
25944"/*---------------------\n"
25945" * identifiers\n"
25946" *---------------------*/\n"
25947"\n"
25948"typedef uint64_t ompt_id_t;\n"
25949"#define ompt_id_none 0\n"
25950"\n"
25951"typedef union ompt_data_t {\n"
25952" uint64_t value; /* data initialized by runtime to unique id */\n"
25953" void *ptr; /* pointer under tool control */\n"
25954"} ompt_data_t;\n"
25955"\n"
25956"static const ompt_data_t ompt_data_none = {0};\n"
25957"\n"
25958"typedef uint64_t omp_wait_id_t;\n"
25959"static const omp_wait_id_t omp_wait_id_none = 0;\n"
25960"\n"
25961"typedef void ompt_device_t;\n"
25962"\n"
25963"/*---------------------\n"
25964" * omp_frame_t\n"
25965" *---------------------*/\n"
25966"\n"
25967"typedef struct omp_frame_t {\n"
25968" void *exit_frame; /* next frame is user code */\n"
25969" void *enter_frame; /* previous frame is user code */\n"
25970"} omp_frame_t;\n"
25971"\n"
25972"\n"
25973"/*---------------------\n"
25974" * dependences types\n"
25975" *---------------------*/\n"
25976"\n"
25977"typedef enum ompt_task_dependence_flag_t {\n"
25978" // a two bit field for the dependence type\n"
25979" ompt_task_dependence_type_out = 1,\n"
25980" ompt_task_dependence_type_in = 2,\n"
25981" ompt_task_dependence_type_inout = 3,\n"
25982"} ompt_task_dependence_flag_t;\n"
25983"\n"
25984"typedef struct ompt_task_dependence_t {\n"
25985" void *variable_addr;\n"
25986" unsigned int dependence_flags;\n"
25987"} ompt_task_dependence_t;\n"
25988"\n"
25989"\n"
25990"/*****************************************************************************\n"
25991" * enumerations for thread states and runtime events\n"
25992" *****************************************************************************/\n"
25993"\n"
25994"/*---------------------\n"
25995" * runtime states\n"
25996" *---------------------*/\n"
25997"\n"
25998"typedef enum {\n"
25999"#define omp_state_macro(state, code) state = code,\n"
26000" FOREACH_OMP_STATE(omp_state_macro)\n"
26001"#undef omp_state_macro\n"
26002"} omp_state_t;\n"
26003"\n"
26004"\n"
26005"/*---------------------\n"
26006" * runtime events\n"
26007" *---------------------*/\n"
26008"\n"
26009"typedef enum ompt_callbacks_e{\n"
26010"#define ompt_event_macro(event, callback, eventid) event = eventid,\n"
26011" FOREACH_OMPT_EVENT(ompt_event_macro)\n"
26012"#undef ompt_event_macro\n"
26013"} ompt_callbacks_t;\n"
26014"\n"
26015"\n"
26016"/*---------------------\n"
26017" * set callback results\n"
26018" *---------------------*/\n"
26019"typedef enum ompt_set_result_t {\n"
26020" ompt_set_error = 0,\n"
26021" ompt_set_never = 1,\n"
26022" ompt_set_sometimes = 2,\n"
26023" ompt_set_sometimes_paired = 3,\n"
26024" ompt_set_always = 4\n"
26025"} ompt_set_result_t;\n"
26026"\n"
26027"\n"
26028"/*----------------------\n"
26029" * mutex implementations\n"
26030" *----------------------*/\n"
26031"typedef enum kmp_mutex_impl_t {\n"
26032"#define kmp_mutex_impl_macro(impl, code) impl = code,\n"
26033" FOREACH_KMP_MUTEX_IMPL(kmp_mutex_impl_macro)\n"
26034"#undef kmp_mutex_impl_macro\n"
26035"} kmp_mutex_impl_t;\n"
26036"\n"
26037"\n"
26038"/*****************************************************************************\n"
26039" * callback signatures\n"
26040" *****************************************************************************/\n"
26041"\n"
26042"/* initialization */\n"
26043"typedef void (*ompt_interface_fn_t)(void);\n"
26044"\n"
26045"typedef ompt_interface_fn_t (*ompt_function_lookup_t)(\n"
26046" const char * /* entry point to look up */\n"
26047");\n"
26048"\n"
26049"/* threads */\n"
26050"typedef enum ompt_thread_type_t {\n"
26051" ompt_thread_initial = 1, // start the enumeration at 1\n"
26052" ompt_thread_worker = 2,\n"
26053" ompt_thread_other = 3,\n"
26054" ompt_thread_unknown = 4\n"
26055"} ompt_thread_type_t;\n"
26056"\n"
26057"typedef enum ompt_invoker_t {\n"
26058" ompt_invoker_program = 1, /* program invokes master task */\n"
26059" ompt_invoker_runtime = 2 /* runtime invokes master task */\n"
26060"} ompt_invoker_t;\n"
26061"\n"
26062"typedef void (*ompt_callback_thread_begin_t) (\n"
26063" ompt_thread_type_t thread_type, /* type of thread */\n"
26064" ompt_data_t *thread_data /* data of thread */\n"
26065");\n"
26066"\n"
26067"typedef void (*ompt_callback_thread_end_t) (\n"
26068" ompt_data_t *thread_data /* data of thread */\n"
26069");\n"
26070"\n"
26071"typedef void (*ompt_wait_callback_t) (\n"
26072" omp_wait_id_t wait_id /* wait data */\n"
26073");\n"
26074"\n"
26075"/* parallel and workshares */\n"
26076"typedef enum ompt_scope_endpoint_t {\n"
26077" ompt_scope_begin = 1,\n"
26078" ompt_scope_end = 2\n"
26079"} ompt_scope_endpoint_t;\n"
26080"\n"
26081"\n"
26082"/* implicit task */\n"
26083"typedef void (*ompt_callback_implicit_task_t) (\n"
26084" ompt_scope_endpoint_t endpoint, /* endpoint of implicit task */\n"
26085" ompt_data_t *parallel_data, /* data of parallel region */\n"
26086" ompt_data_t *task_data, /* data of implicit task */\n"
26087" unsigned int team_size, /* team size */\n"
26088" unsigned int thread_num /* thread number of calling thread */\n"
26089");\n"
26090"\n"
26091"typedef void (*ompt_callback_parallel_begin_t) (\n"
26092" ompt_data_t *encountering_task_data, /* data of encountering task */\n"
26093" const omp_frame_t *encountering_task_frame, /* frame data of encountering task */\n"
26094" ompt_data_t *parallel_data, /* data of parallel region */\n"
26095" unsigned int requested_team_size, /* requested number of threads in team */\n"
26096" ompt_invoker_t invoker, /* invoker of master task */\n"
26097" const void *codeptr_ra /* return address of runtime call */\n"
26098");\n"
26099"\n"
26100"typedef void (*ompt_callback_parallel_end_t) (\n"
26101" ompt_data_t *parallel_data, /* data of parallel region */\n"
26102" ompt_data_t *encountering_task_data, /* data of encountering task */\n"
26103" ompt_invoker_t invoker, /* invoker of master task */ \n"
26104" const void *codeptr_ra /* return address of runtime call */\n"
26105");\n"
26106"\n"
26107"/* tasks */\n"
26108"typedef enum ompt_task_type_t {\n"
26109" ompt_task_initial = 0x1,\n"
26110" ompt_task_implicit = 0x2,\n"
26111" ompt_task_explicit = 0x4,\n"
26112" ompt_task_target = 0x8,\n"
26113" ompt_task_undeferred = 0x8000000,\n"
26114" ompt_task_untied = 0x10000000,\n"
26115" ompt_task_final = 0x20000000,\n"
26116" ompt_task_mergeable = 0x40000000,\n"
26117" ompt_task_merged = 0x80000000\n"
26118"} ompt_task_type_t;\n"
26119"\n"
26120"typedef enum ompt_task_status_t {\n"
26121" ompt_task_complete = 1,\n"
26122" ompt_task_yield = 2,\n"
26123" ompt_task_cancel = 3,\n"
26124" ompt_task_others = 4\n"
26125"} ompt_task_status_t;\n"
26126"\n"
26127"typedef void (*ompt_callback_task_schedule_t) (\n"
26128" ompt_data_t *prior_task_data, /* data of prior task */\n"
26129" ompt_task_status_t prior_task_status, /* status of prior task */\n"
26130" ompt_data_t *next_task_data /* data of next task */\n"
26131");\n"
26132"\n"
26133"typedef void (*ompt_callback_task_create_t) (\n"
26134" ompt_data_t *encountering_task_data, /* data of parent task */\n"
26135" const omp_frame_t *encountering_task_frame, /* frame data for parent task */\n"
26136" ompt_data_t *new_task_data, /* data of created task */\n"
26137" int type, /* type of created task */\n"
26138" int has_dependences, /* created task has dependences */\n"
26139" const void *codeptr_ra /* return address of runtime call */\n"
26140");\n"
26141"\n"
26142"/* task dependences */\n"
26143"typedef void (*ompt_callback_task_dependences_t) (\n"
26144" ompt_data_t *task_data, /* data of task */\n"
26145" const ompt_task_dependence_t *deps, /* dependences of task */\n"
26146" int ndeps /* dependences count of task */\n"
26147");\n"
26148"\n"
26149"typedef void (*ompt_callback_task_dependence_t) (\n"
26150" ompt_data_t *src_task_data, /* data of source task */\n"
26151" ompt_data_t *sink_task_data /* data of sink task */\n"
26152");\n"
26153"\n"
26154"/* target and device */\n"
26155"typedef enum ompt_target_type_t {\n"
26156" ompt_target = 1,\n"
26157" ompt_target_enter_data = 2,\n"
26158" ompt_target_exit_data = 3,\n"
26159" ompt_target_update = 4\n"
26160"} ompt_target_type_t;\n"
26161"\n"
26162"typedef void (*ompt_callback_target_t) (\n"
26163" ompt_target_type_t kind,\n"
26164" ompt_scope_endpoint_t endpoint,\n"
26165" uint64_t device_num,\n"
26166" ompt_data_t *task_data,\n"
26167" ompt_id_t target_id,\n"
26168" const void *codeptr_ra\n"
26169");\n"
26170"\n"
26171"typedef enum ompt_target_data_op_t {\n"
26172" ompt_target_data_alloc = 1,\n"
26173" ompt_target_data_transfer_to_dev = 2,\n"
26174" ompt_target_data_transfer_from_dev = 3,\n"
26175" ompt_target_data_delete = 4\n"
26176"} ompt_target_data_op_t;\n"
26177"\n"
26178"typedef void (*ompt_callback_target_data_op_t) (\n"
26179" ompt_id_t target_id,\n"
26180" ompt_id_t host_op_id,\n"
26181" ompt_target_data_op_t optype,\n"
26182" void *host_addr,\n"
26183" void *device_addr,\n"
26184" size_t bytes\n"
26185");\n"
26186"\n"
26187"typedef void (*ompt_callback_target_submit_t) (\n"
26188" ompt_id_t target_id,\n"
26189" ompt_id_t host_op_id\n"
26190");\n"
26191"\n"
26192"typedef void (*ompt_callback_target_map_t) (\n"
26193" ompt_id_t target_id,\n"
26194" unsigned int nitems,\n"
26195" void **host_addr,\n"
26196" void **device_addr,\n"
26197" size_t *bytes,\n"
26198" unsigned int *mapping_flags\n"
26199");\n"
26200"\n"
26201"typedef void (*ompt_callback_device_initialize_t) (\n"
26202" uint64_t device_num,\n"
26203" const char *type,\n"
26204" ompt_device_t *device,\n"
26205" ompt_function_lookup_t lookup,\n"
26206" const char *documentation\n"
26207");\n"
26208"\n"
26209"typedef void (*ompt_callback_device_finalize_t) (\n"
26210" uint64_t device_num\n"
26211");\n"
26212"\n"
26213"typedef void (*ompt_callback_device_load_t) (\n"
26214" uint64_t device_num,\n"
26215" const char * filename,\n"
26216" int64_t offset_in_file,\n"
26217" void * vma_in_file,\n"
26218" size_t bytes,\n"
26219" void * host_addr,\n"
26220" void * device_addr,\n"
26221" uint64_t module_id\n"
26222");\n"
26223"\n"
26224"#define ompt_addr_unknown ((void *) ~0)\n"
26225"\n"
26226"typedef void (*ompt_callback_device_unload_t) (\n"
26227" uint64_t device_num,\n"
26228" uint64_t module_id\n"
26229");\n"
26230"\n"
26231"/* control_tool */\n"
26232"typedef int (*ompt_callback_control_tool_t) (\n"
26233" uint64_t command, /* command of control call */\n"
26234" uint64_t modifier, /* modifier of control call */\n"
26235" void *arg, /* argument of control call */\n"
26236" const void *codeptr_ra /* return address of runtime call */\n"
26237");\n"
26238"\n"
26239"typedef enum ompt_mutex_kind_t {\n"
26240" ompt_mutex = 0x10,\n"
26241" ompt_mutex_lock = 0x11,\n"
26242" ompt_mutex_nest_lock = 0x12,\n"
26243" ompt_mutex_critical = 0x13,\n"
26244" ompt_mutex_atomic = 0x14,\n"
26245" ompt_mutex_ordered = 0x20\n"
26246"} ompt_mutex_kind_t;\n"
26247"\n"
26248"typedef void (*ompt_callback_mutex_acquire_t) (\n"
26249" ompt_mutex_kind_t kind, /* mutex kind */\n"
26250" unsigned int hint, /* mutex hint */\n"
26251" unsigned int impl, /* mutex implementation */\n"
26252" omp_wait_id_t wait_id, /* id of object being awaited */\n"
26253" const void *codeptr_ra /* return address of runtime call */\n"
26254");\n"
26255"\n"
26256"typedef void (*ompt_callback_mutex_t) (\n"
26257" ompt_mutex_kind_t kind, /* mutex kind */\n"
26258" omp_wait_id_t wait_id, /* id of object being awaited */\n"
26259" const void *codeptr_ra /* return address of runtime call */\n"
26260");\n"
26261"\n"
26262"typedef void (*ompt_callback_nest_lock_t) (\n"
26263" ompt_scope_endpoint_t endpoint, /* endpoint of nested lock */\n"
26264" omp_wait_id_t wait_id, /* id of object being awaited */\n"
26265" const void *codeptr_ra /* return address of runtime call */\n"
26266");\n"
26267"\n"
26268"typedef void (*ompt_callback_master_t) (\n"
26269" ompt_scope_endpoint_t endpoint, /* endpoint of master region */\n"
26270" ompt_data_t *parallel_data, /* data of parallel region */\n"
26271" ompt_data_t *task_data, /* data of task */\n"
26272" const void *codeptr_ra /* return address of runtime call */\n"
26273");\n"
26274"\n"
26275"typedef void (*ompt_callback_idle_t) (\n"
26276" ompt_scope_endpoint_t endpoint /* endpoint of idle time */\n"
26277");\n"
26278"\n"
26279"typedef enum ompt_work_type_t {\n"
26280" ompt_work_loop = 1,\n"
26281" ompt_work_sections = 2,\n"
26282" ompt_work_single_executor = 3,\n"
26283" ompt_work_single_other = 4,\n"
26284" ompt_work_workshare = 5,\n"
26285" ompt_work_distribute = 6,\n"
26286" ompt_work_taskloop = 7\n"
26287"} ompt_work_type_t;\n"
26288"\n"
26289"typedef void (*ompt_callback_work_t) (\n"
26290" ompt_work_type_t wstype, /* type of work region */\n"
26291" ompt_scope_endpoint_t endpoint, /* endpoint of work region */\n"
26292" ompt_data_t *parallel_data, /* data of parallel region */\n"
26293" ompt_data_t *task_data, /* data of task */\n"
26294" uint64_t count, /* quantity of work */\n"
26295" const void *codeptr_ra /* return address of runtime call */\n"
26296");\n"
26297"\n"
26298"typedef enum ompt_sync_region_kind_t {\n"
26299" ompt_sync_region_barrier = 1,\n"
26300" ompt_sync_region_taskwait = 2,\n"
26301" ompt_sync_region_taskgroup = 3\n"
26302"} ompt_sync_region_kind_t;\n"
26303"\n"
26304"typedef void (*ompt_callback_sync_region_t) (\n"
26305" ompt_sync_region_kind_t kind, /* kind of sync region */\n"
26306" ompt_scope_endpoint_t endpoint, /* endpoint of sync region */\n"
26307" ompt_data_t *parallel_data, /* data of parallel region */\n"
26308" ompt_data_t *task_data, /* data of task */\n"
26309" const void *codeptr_ra /* return address of runtime call */\n"
26310");\n"
26311"\n"
26312"typedef enum ompt_cancel_flag_t {\n"
26313" ompt_cancel_parallel = 0x1,\n"
26314" ompt_cancel_sections = 0x2,\n"
26315" ompt_cancel_do = 0x4,\n"
26316" ompt_cancel_taskgroup = 0x8,\n"
26317" ompt_cancel_activated = 0x10,\n"
26318" ompt_cancel_detected = 0x20,\n"
26319" ompt_cancel_discarded_task = 0x40\n"
26320"} ompt_cancel_flag_t;\n"
26321"\n"
26322"typedef void (*ompt_callback_cancel_t) (\n"
26323" ompt_data_t *task_data, /* data of task */\n"
26324" int flags, /* cancel flags */\n"
26325" const void *codeptr_ra /* return address of runtime call */\n"
26326");\n"
26327"\n"
26328"typedef void (*ompt_callback_flush_t) (\n"
26329" ompt_data_t *thread_data, /* data of thread */\n"
26330" const void *codeptr_ra /* return address of runtime call */\n"
26331");\n"
26332"\n"
26333"/****************************************************************************\n"
26334" * ompt API\n"
26335" ***************************************************************************/\n"
26336"\n"
26337"#ifdef __cplusplus\n"
26338"extern \"C\" {\n"
26339"#endif\n"
26340"\n"
26341"#define OMPT_API_FNTYPE(fn) fn##_t\n"
26342"\n"
26343"#define OMPT_API_FUNCTION(return_type, fn, args) \\\n"
26344" typedef return_type (*OMPT_API_FNTYPE(fn)) args\n"
26345"\n"
26346"\n"
26347"\n"
26348"/****************************************************************************\n"
26349" * INQUIRY FUNCTIONS\n"
26350" ***************************************************************************/\n"
26351"\n"
26352"/* state */\n"
26353"OMPT_API_FUNCTION(omp_state_t, ompt_get_state, (\n"
26354" omp_wait_id_t *wait_id\n"
26355"));\n"
26356"\n"
26357"/* thread */\n"
26358"OMPT_API_FUNCTION(ompt_data_t*, ompt_get_thread_data, (void));\n"
26359"\n"
26360"/* parallel region */\n"
26361"OMPT_API_FUNCTION(int, ompt_get_parallel_info, (\n"
26362" int ancestor_level,\n"
26363" ompt_data_t **parallel_data,\n"
26364" int *team_size\n"
26365"));\n"
26366"\n"
26367"/* task */\n"
26368"OMPT_API_FUNCTION(int, ompt_get_task_info, (\n"
26369" int ancestor_level,\n"
26370" int *type,\n"
26371" ompt_data_t **task_data,\n"
26372" omp_frame_t **task_frame,\n"
26373" ompt_data_t **parallel_data,\n"
26374" int *thread_num\n"
26375"));\n"
26376"\n"
26377"/* procs */\n"
26378"OMPT_API_FUNCTION(int, ompt_get_num_procs, (void));\n"
26379"\n"
26380"/* places */\n"
26381"OMPT_API_FUNCTION(int, ompt_get_num_places, (void));\n"
26382"\n"
26383"OMPT_API_FUNCTION(int, ompt_get_place_proc_ids, (\n"
26384" int place_num,\n"
26385" int ids_size,\n"
26386" int *ids\n"
26387"));\n"
26388"\n"
26389"OMPT_API_FUNCTION(int, ompt_get_place_num, (void));\n"
26390"\n"
26391"OMPT_API_FUNCTION(int, ompt_get_partition_place_nums, (\n"
26392" int place_nums_size,\n"
26393" int *place_nums\n"
26394"));\n"
26395"\n"
26396"/* proc_id */\n"
26397"OMPT_API_FUNCTION(int, ompt_get_proc_id, (void));\n"
26398"\n"
26399"\n"
26400"/****************************************************************************\n"
26401" * INITIALIZATION FUNCTIONS\n"
26402" ***************************************************************************/\n"
26403"\n"
26404"OMPT_API_FUNCTION(int, ompt_initialize, (\n"
26405" ompt_function_lookup_t ompt_fn_lookup,\n"
26406" ompt_data_t *tool_data\n"
26407"));\n"
26408"\n"
26409"OMPT_API_FUNCTION(void, ompt_finalize, (\n"
26410" ompt_data_t *tool_data\n"
26411"));\n"
26412"\n"
26413"typedef struct ompt_start_tool_result_t {\n"
26414" ompt_initialize_t initialize;\n"
26415" ompt_finalize_t finalize;\n"
26416" ompt_data_t tool_data;\n"
26417"} ompt_start_tool_result_t;\n"
26418"\n"
26419"/* initialization interface to be defined by tool */\n"
26420"#ifdef _WIN32\n"
26421"__declspec(dllexport)\n"
26422"#endif\n"
26423"ompt_start_tool_result_t * ompt_start_tool(\n"
26424" unsigned int omp_version, \n"
26425" const char * runtime_version\n"
26426");\n"
26427"\n"
26428"typedef void (*ompt_callback_t)(void);\n"
26429"\n"
26430"OMPT_API_FUNCTION(int, ompt_set_callback, (\n"
26431" ompt_callbacks_t which,\n"
26432" ompt_callback_t callback\n"
26433"));\n"
26434"\n"
26435"OMPT_API_FUNCTION(int, ompt_get_callback, (\n"
26436" ompt_callbacks_t which,\n"
26437" ompt_callback_t *callback\n"
26438"));\n"
26439"\n"
26440"\n"
26441"\n"
26442"/****************************************************************************\n"
26443" * MISCELLANEOUS FUNCTIONS\n"
26444" ***************************************************************************/\n"
26445"\n"
26446"/* state enumeration */\n"
26447"OMPT_API_FUNCTION(int, ompt_enumerate_states, (\n"
26448" int current_state,\n"
26449" int *next_state,\n"
26450" const char **next_state_name\n"
26451"));\n"
26452"\n"
26453"/* mutex implementation enumeration */\n"
26454"OMPT_API_FUNCTION(int, ompt_enumerate_mutex_impls, (\n"
26455" int current_impl,\n"
26456" int *next_impl,\n"
26457" const char **next_impl_name\n"
26458"));\n"
26459"\n"
26460"/* get_unique_id */\n"
26461"OMPT_API_FUNCTION(uint64_t, ompt_get_unique_id, (void));\n"
26462"\n"
26463"#ifdef __cplusplus\n"
26464"};\n"
26465"#endif\n"
26466"\n"
26467"/****************************************************************************\n"
26468" * TARGET\n"
26469" ***************************************************************************/\n"
26470"\n"
26471" OMPT_API_FUNCTION(int, ompt_get_target_info, (\n"
26472" uint64_t *device_num,\n"
26473" ompt_id_t *target_id,\n"
26474" ompt_id_t *host_op_id\n"
26475"));\n"
26476"\n"
26477" OMPT_API_FUNCTION(int, ompt_get_num_devices, (void));\n"
26478"\n"
26479"#endif /* __OMPT__ */\n"
26480"" } ,
26481 { "/builtins/opencl-c.h" , "//===--- opencl-c.h - OpenCL C language builtin function header -----------===//\n"
26482"//\n"
26483"// The LLVM Compiler Infrastructure\n"
26484"//\n"
26485"// This file is distributed under the University of Illinois Open Source\n"
26486"// License. See LICENSE.TXT for details.\n"
26487"//\n"
26488"//===----------------------------------------------------------------------===//\n"
26489"\n"
26490"#ifndef _OPENCL_H_\n"
26491"#define _OPENCL_H_\n"
26492"\n"
26493"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
26494"#ifndef cl_khr_depth_images\n"
26495"#define cl_khr_depth_images\n"
26496"#endif //cl_khr_depth_images\n"
26497"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
26498"\n"
26499"#if __OPENCL_C_VERSION__ < CL_VERSION_2_0\n"
26500"#ifdef cl_khr_3d_image_writes\n"
26501"#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable\n"
26502"#endif //cl_khr_3d_image_writes\n"
26503"#endif //__OPENCL_C_VERSION__ < CL_VERSION_2_0\n"
26504"\n"
26505"#define __ovld __attribute__((overloadable))\n"
26506"#define __conv __attribute__((convergent))\n"
26507"\n"
26508"// Optimizations\n"
26509"#define __purefn __attribute__((pure))\n"
26510"#define __cnfn __attribute__((const))\n"
26511"\n"
26512"// built-in scalar data types:\n"
26513"\n"
26514"/**\n"
26515" * An unsigned 8-bit integer.\n"
26516" */\n"
26517"typedef unsigned char uchar;\n"
26518"\n"
26519"/**\n"
26520" * An unsigned 16-bit integer.\n"
26521" */\n"
26522"typedef unsigned short ushort;\n"
26523"\n"
26524"/**\n"
26525" * An unsigned 32-bit integer.\n"
26526" */\n"
26527"typedef unsigned int uint;\n"
26528"\n"
26529"/**\n"
26530" * An unsigned 64-bit integer.\n"
26531" */\n"
26532"typedef unsigned long ulong;\n"
26533"\n"
26534"/**\n"
26535" * The unsigned integer type of the result of the sizeof operator. This\n"
26536" * is a 32-bit unsigned integer if CL_DEVICE_ADDRESS_BITS\n"
26537" * defined in table 4.3 is 32-bits and is a 64-bit unsigned integer if\n"
26538" * CL_DEVICE_ADDRESS_BITS is 64-bits.\n"
26539" */\n"
26540"typedef __SIZE_TYPE__ size_t;\n"
26541"\n"
26542"/**\n"
26543" * A signed integer type that is the result of subtracting two pointers.\n"
26544" * This is a 32-bit signed integer if CL_DEVICE_ADDRESS_BITS\n"
26545" * defined in table 4.3 is 32-bits and is a 64-bit signed integer if\n"
26546" * CL_DEVICE_ADDRESS_BITS is 64-bits.\n"
26547" */\n"
26548"typedef __PTRDIFF_TYPE__ ptrdiff_t;\n"
26549"\n"
26550"/**\n"
26551"* A signed integer type with the property that any valid pointer to\n"
26552"* void can be converted to this type, then converted back to pointer\n"
26553"* to void, and the result will compare equal to the original pointer.\n"
26554"*/\n"
26555"typedef __INTPTR_TYPE__ intptr_t;\n"
26556"\n"
26557"/**\n"
26558"* An unsigned integer type with the property that any valid pointer to\n"
26559"* void can be converted to this type, then converted back to pointer\n"
26560"* to void, and the result will compare equal to the original pointer.\n"
26561"*/\n"
26562"typedef __UINTPTR_TYPE__ uintptr_t;\n"
26563"\n"
26564"// built-in vector data types:\n"
26565"typedef char char2 __attribute__((ext_vector_type(2)));\n"
26566"typedef char char3 __attribute__((ext_vector_type(3)));\n"
26567"typedef char char4 __attribute__((ext_vector_type(4)));\n"
26568"typedef char char8 __attribute__((ext_vector_type(8)));\n"
26569"typedef char char16 __attribute__((ext_vector_type(16)));\n"
26570"typedef uchar uchar2 __attribute__((ext_vector_type(2)));\n"
26571"typedef uchar uchar3 __attribute__((ext_vector_type(3)));\n"
26572"typedef uchar uchar4 __attribute__((ext_vector_type(4)));\n"
26573"typedef uchar uchar8 __attribute__((ext_vector_type(8)));\n"
26574"typedef uchar uchar16 __attribute__((ext_vector_type(16)));\n"
26575"typedef short short2 __attribute__((ext_vector_type(2)));\n"
26576"typedef short short3 __attribute__((ext_vector_type(3)));\n"
26577"typedef short short4 __attribute__((ext_vector_type(4)));\n"
26578"typedef short short8 __attribute__((ext_vector_type(8)));\n"
26579"typedef short short16 __attribute__((ext_vector_type(16)));\n"
26580"typedef ushort ushort2 __attribute__((ext_vector_type(2)));\n"
26581"typedef ushort ushort3 __attribute__((ext_vector_type(3)));\n"
26582"typedef ushort ushort4 __attribute__((ext_vector_type(4)));\n"
26583"typedef ushort ushort8 __attribute__((ext_vector_type(8)));\n"
26584"typedef ushort ushort16 __attribute__((ext_vector_type(16)));\n"
26585"typedef int int2 __attribute__((ext_vector_type(2)));\n"
26586"typedef int int3 __attribute__((ext_vector_type(3)));\n"
26587"typedef int int4 __attribute__((ext_vector_type(4)));\n"
26588"typedef int int8 __attribute__((ext_vector_type(8)));\n"
26589"typedef int int16 __attribute__((ext_vector_type(16)));\n"
26590"typedef uint uint2 __attribute__((ext_vector_type(2)));\n"
26591"typedef uint uint3 __attribute__((ext_vector_type(3)));\n"
26592"typedef uint uint4 __attribute__((ext_vector_type(4)));\n"
26593"typedef uint uint8 __attribute__((ext_vector_type(8)));\n"
26594"typedef uint uint16 __attribute__((ext_vector_type(16)));\n"
26595"typedef long long2 __attribute__((ext_vector_type(2)));\n"
26596"typedef long long3 __attribute__((ext_vector_type(3)));\n"
26597"typedef long long4 __attribute__((ext_vector_type(4)));\n"
26598"typedef long long8 __attribute__((ext_vector_type(8)));\n"
26599"typedef long long16 __attribute__((ext_vector_type(16)));\n"
26600"typedef ulong ulong2 __attribute__((ext_vector_type(2)));\n"
26601"typedef ulong ulong3 __attribute__((ext_vector_type(3)));\n"
26602"typedef ulong ulong4 __attribute__((ext_vector_type(4)));\n"
26603"typedef ulong ulong8 __attribute__((ext_vector_type(8)));\n"
26604"typedef ulong ulong16 __attribute__((ext_vector_type(16)));\n"
26605"typedef float float2 __attribute__((ext_vector_type(2)));\n"
26606"typedef float float3 __attribute__((ext_vector_type(3)));\n"
26607"typedef float float4 __attribute__((ext_vector_type(4)));\n"
26608"typedef float float8 __attribute__((ext_vector_type(8)));\n"
26609"typedef float float16 __attribute__((ext_vector_type(16)));\n"
26610"#ifdef cl_khr_fp16\n"
26611"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
26612"typedef half half2 __attribute__((ext_vector_type(2)));\n"
26613"typedef half half3 __attribute__((ext_vector_type(3)));\n"
26614"typedef half half4 __attribute__((ext_vector_type(4)));\n"
26615"typedef half half8 __attribute__((ext_vector_type(8)));\n"
26616"typedef half half16 __attribute__((ext_vector_type(16)));\n"
26617"#endif\n"
26618"#ifdef cl_khr_fp64\n"
26619"#if __OPENCL_C_VERSION__ < CL_VERSION_1_2\n"
26620"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
26621"#endif\n"
26622"typedef double double2 __attribute__((ext_vector_type(2)));\n"
26623"typedef double double3 __attribute__((ext_vector_type(3)));\n"
26624"typedef double double4 __attribute__((ext_vector_type(4)));\n"
26625"typedef double double8 __attribute__((ext_vector_type(8)));\n"
26626"typedef double double16 __attribute__((ext_vector_type(16)));\n"
26627"#endif\n"
26628"\n"
26629"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
26630"#define NULL ((void*)0)\n"
26631"#endif\n"
26632"\n"
26633"/**\n"
26634" * Value of maximum non-infinite single-precision floating-point\n"
26635" * number.\n"
26636" */\n"
26637"#define MAXFLOAT 0x1.fffffep127f\n"
26638"\n"
26639"/**\n"
26640" * A positive float constant expression. HUGE_VALF evaluates\n"
26641" * to +infinity. Used as an error value returned by the built-in\n"
26642" * math functions.\n"
26643" */\n"
26644"#define HUGE_VALF (__builtin_huge_valf())\n"
26645"\n"
26646"/**\n"
26647" * A positive double constant expression. HUGE_VAL evaluates\n"
26648" * to +infinity. Used as an error value returned by the built-in\n"
26649" * math functions.\n"
26650" */\n"
26651"#define HUGE_VAL (__builtin_huge_val())\n"
26652"\n"
26653"/**\n"
26654" * A constant expression of type float representing positive or\n"
26655" * unsigned infinity.\n"
26656" */\n"
26657"#define INFINITY (__builtin_inff())\n"
26658"\n"
26659"/**\n"
26660" * A constant expression of type float representing a quiet NaN.\n"
26661" */\n"
26662"#define NAN as_float(INT_MAX)\n"
26663"\n"
26664"#define FP_ILOGB0 INT_MIN\n"
26665"#define FP_ILOGBNAN INT_MAX\n"
26666"\n"
26667"#define FLT_DIG 6\n"
26668"#define FLT_MANT_DIG 24\n"
26669"#define FLT_MAX_10_EXP +38\n"
26670"#define FLT_MAX_EXP +128\n"
26671"#define FLT_MIN_10_EXP -37\n"
26672"#define FLT_MIN_EXP -125\n"
26673"#define FLT_RADIX 2\n"
26674"#define FLT_MAX 0x1.fffffep127f\n"
26675"#define FLT_MIN 0x1.0p-126f\n"
26676"#define FLT_EPSILON 0x1.0p-23f\n"
26677"\n"
26678"#define M_E_F 2.71828182845904523536028747135266250f\n"
26679"#define M_LOG2E_F 1.44269504088896340735992468100189214f\n"
26680"#define M_LOG10E_F 0.434294481903251827651128918916605082f\n"
26681"#define M_LN2_F 0.693147180559945309417232121458176568f\n"
26682"#define M_LN10_F 2.30258509299404568401799145468436421f\n"
26683"#define M_PI_F 3.14159265358979323846264338327950288f\n"
26684"#define M_PI_2_F 1.57079632679489661923132169163975144f\n"
26685"#define M_PI_4_F 0.785398163397448309615660845819875721f\n"
26686"#define M_1_PI_F 0.318309886183790671537767526745028724f\n"
26687"#define M_2_PI_F 0.636619772367581343075535053490057448f\n"
26688"#define M_2_SQRTPI_F 1.12837916709551257389615890312154517f\n"
26689"#define M_SQRT2_F 1.41421356237309504880168872420969808f\n"
26690"#define M_SQRT1_2_F 0.707106781186547524400844362104849039f\n"
26691"\n"
26692"#define DBL_DIG 15\n"
26693"#define DBL_MANT_DIG 53\n"
26694"#define DBL_MAX_10_EXP +308\n"
26695"#define DBL_MAX_EXP +1024\n"
26696"#define DBL_MIN_10_EXP -307\n"
26697"#define DBL_MIN_EXP -1021\n"
26698"#define DBL_RADIX 2\n"
26699"#define DBL_MAX 0x1.fffffffffffffp1023\n"
26700"#define DBL_MIN 0x1.0p-1022\n"
26701"#define DBL_EPSILON 0x1.0p-52\n"
26702"\n"
26703"#define M_E 0x1.5bf0a8b145769p+1\n"
26704"#define M_LOG2E 0x1.71547652b82fep+0\n"
26705"#define M_LOG10E 0x1.bcb7b1526e50ep-2\n"
26706"#define M_LN2 0x1.62e42fefa39efp-1\n"
26707"#define M_LN10 0x1.26bb1bbb55516p+1\n"
26708"#define M_PI 0x1.921fb54442d18p+1\n"
26709"#define M_PI_2 0x1.921fb54442d18p+0\n"
26710"#define M_PI_4 0x1.921fb54442d18p-1\n"
26711"#define M_1_PI 0x1.45f306dc9c883p-2\n"
26712"#define M_2_PI 0x1.45f306dc9c883p-1\n"
26713"#define M_2_SQRTPI 0x1.20dd750429b6dp+0\n"
26714"#define M_SQRT2 0x1.6a09e667f3bcdp+0\n"
26715"#define M_SQRT1_2 0x1.6a09e667f3bcdp-1\n"
26716"\n"
26717"#ifdef cl_khr_fp16\n"
26718"\n"
26719"#define HALF_DIG 3\n"
26720"#define HALF_MANT_DIG 11\n"
26721"#define HALF_MAX_10_EXP +4\n"
26722"#define HALF_MAX_EXP +16\n"
26723"#define HALF_MIN_10_EXP -4\n"
26724"#define HALF_MIN_EXP -13\n"
26725"#define HALF_RADIX 2\n"
26726"#define HALF_MAX ((0x1.ffcp15h))\n"
26727"#define HALF_MIN ((0x1.0p-14h))\n"
26728"#define HALF_EPSILON ((0x1.0p-10h))\n"
26729"\n"
26730"#define M_E_H 2.71828182845904523536028747135266250h\n"
26731"#define M_LOG2E_H 1.44269504088896340735992468100189214h\n"
26732"#define M_LOG10E_H 0.434294481903251827651128918916605082h\n"
26733"#define M_LN2_H 0.693147180559945309417232121458176568h\n"
26734"#define M_LN10_H 2.30258509299404568401799145468436421h\n"
26735"#define M_PI_H 3.14159265358979323846264338327950288h\n"
26736"#define M_PI_2_H 1.57079632679489661923132169163975144h\n"
26737"#define M_PI_4_H 0.785398163397448309615660845819875721h\n"
26738"#define M_1_PI_H 0.318309886183790671537767526745028724h\n"
26739"#define M_2_PI_H 0.636619772367581343075535053490057448h\n"
26740"#define M_2_SQRTPI_H 1.12837916709551257389615890312154517h\n"
26741"#define M_SQRT2_H 1.41421356237309504880168872420969808h\n"
26742"#define M_SQRT1_2_H 0.707106781186547524400844362104849039h\n"
26743"\n"
26744"#endif //cl_khr_fp16\n"
26745"\n"
26746"#define CHAR_BIT 8\n"
26747"#define SCHAR_MAX 127\n"
26748"#define SCHAR_MIN (-128)\n"
26749"#define UCHAR_MAX 255\n"
26750"#define CHAR_MAX SCHAR_MAX\n"
26751"#define CHAR_MIN SCHAR_MIN\n"
26752"#define USHRT_MAX 65535\n"
26753"#define SHRT_MAX 32767\n"
26754"#define SHRT_MIN (-32768)\n"
26755"#define UINT_MAX 0xffffffff\n"
26756"#define INT_MAX 2147483647\n"
26757"#define INT_MIN (-2147483647-1)\n"
26758"#define ULONG_MAX 0xffffffffffffffffUL\n"
26759"#define LONG_MAX 0x7fffffffffffffffL\n"
26760"#define LONG_MIN (-0x7fffffffffffffffL-1)\n"
26761"\n"
26762"// OpenCL v1.1/1.2/2.0 s6.2.3 - Explicit conversions\n"
26763"\n"
26764"char __ovld __cnfn convert_char_rte(char);\n"
26765"char __ovld __cnfn convert_char_sat_rte(char);\n"
26766"char __ovld __cnfn convert_char_rtz(char);\n"
26767"char __ovld __cnfn convert_char_sat_rtz(char);\n"
26768"char __ovld __cnfn convert_char_rtp(char);\n"
26769"char __ovld __cnfn convert_char_sat_rtp(char);\n"
26770"char __ovld __cnfn convert_char_rtn(char);\n"
26771"char __ovld __cnfn convert_char_sat_rtn(char);\n"
26772"char __ovld __cnfn convert_char(char);\n"
26773"char __ovld __cnfn convert_char_sat(char);\n"
26774"char __ovld __cnfn convert_char_rte(uchar);\n"
26775"char __ovld __cnfn convert_char_sat_rte(uchar);\n"
26776"char __ovld __cnfn convert_char_rtz(uchar);\n"
26777"char __ovld __cnfn convert_char_sat_rtz(uchar);\n"
26778"char __ovld __cnfn convert_char_rtp(uchar);\n"
26779"char __ovld __cnfn convert_char_sat_rtp(uchar);\n"
26780"char __ovld __cnfn convert_char_rtn(uchar);\n"
26781"char __ovld __cnfn convert_char_sat_rtn(uchar);\n"
26782"char __ovld __cnfn convert_char(uchar);\n"
26783"char __ovld __cnfn convert_char_sat(uchar);\n"
26784"char __ovld __cnfn convert_char_rte(short);\n"
26785"char __ovld __cnfn convert_char_sat_rte(short);\n"
26786"char __ovld __cnfn convert_char_rtz(short);\n"
26787"char __ovld __cnfn convert_char_sat_rtz(short);\n"
26788"char __ovld __cnfn convert_char_rtp(short);\n"
26789"char __ovld __cnfn convert_char_sat_rtp(short);\n"
26790"char __ovld __cnfn convert_char_rtn(short);\n"
26791"char __ovld __cnfn convert_char_sat_rtn(short);\n"
26792"char __ovld __cnfn convert_char(short);\n"
26793"char __ovld __cnfn convert_char_sat(short);\n"
26794"char __ovld __cnfn convert_char_rte(ushort);\n"
26795"char __ovld __cnfn convert_char_sat_rte(ushort);\n"
26796"char __ovld __cnfn convert_char_rtz(ushort);\n"
26797"char __ovld __cnfn convert_char_sat_rtz(ushort);\n"
26798"char __ovld __cnfn convert_char_rtp(ushort);\n"
26799"char __ovld __cnfn convert_char_sat_rtp(ushort);\n"
26800"char __ovld __cnfn convert_char_rtn(ushort);\n"
26801"char __ovld __cnfn convert_char_sat_rtn(ushort);\n"
26802"char __ovld __cnfn convert_char(ushort);\n"
26803"char __ovld __cnfn convert_char_sat(ushort);\n"
26804"char __ovld __cnfn convert_char_rte(int);\n"
26805"char __ovld __cnfn convert_char_sat_rte(int);\n"
26806"char __ovld __cnfn convert_char_rtz(int);\n"
26807"char __ovld __cnfn convert_char_sat_rtz(int);\n"
26808"char __ovld __cnfn convert_char_rtp(int);\n"
26809"char __ovld __cnfn convert_char_sat_rtp(int);\n"
26810"char __ovld __cnfn convert_char_rtn(int);\n"
26811"char __ovld __cnfn convert_char_sat_rtn(int);\n"
26812"char __ovld __cnfn convert_char(int);\n"
26813"char __ovld __cnfn convert_char_sat(int);\n"
26814"char __ovld __cnfn convert_char_rte(uint);\n"
26815"char __ovld __cnfn convert_char_sat_rte(uint);\n"
26816"char __ovld __cnfn convert_char_rtz(uint);\n"
26817"char __ovld __cnfn convert_char_sat_rtz(uint);\n"
26818"char __ovld __cnfn convert_char_rtp(uint);\n"
26819"char __ovld __cnfn convert_char_sat_rtp(uint);\n"
26820"char __ovld __cnfn convert_char_rtn(uint);\n"
26821"char __ovld __cnfn convert_char_sat_rtn(uint);\n"
26822"char __ovld __cnfn convert_char(uint);\n"
26823"char __ovld __cnfn convert_char_sat(uint);\n"
26824"char __ovld __cnfn convert_char_rte(long);\n"
26825"char __ovld __cnfn convert_char_sat_rte(long);\n"
26826"char __ovld __cnfn convert_char_rtz(long);\n"
26827"char __ovld __cnfn convert_char_sat_rtz(long);\n"
26828"char __ovld __cnfn convert_char_rtp(long);\n"
26829"char __ovld __cnfn convert_char_sat_rtp(long);\n"
26830"char __ovld __cnfn convert_char_rtn(long);\n"
26831"char __ovld __cnfn convert_char_sat_rtn(long);\n"
26832"char __ovld __cnfn convert_char(long);\n"
26833"char __ovld __cnfn convert_char_sat(long);\n"
26834"char __ovld __cnfn convert_char_rte(ulong);\n"
26835"char __ovld __cnfn convert_char_sat_rte(ulong);\n"
26836"char __ovld __cnfn convert_char_rtz(ulong);\n"
26837"char __ovld __cnfn convert_char_sat_rtz(ulong);\n"
26838"char __ovld __cnfn convert_char_rtp(ulong);\n"
26839"char __ovld __cnfn convert_char_sat_rtp(ulong);\n"
26840"char __ovld __cnfn convert_char_rtn(ulong);\n"
26841"char __ovld __cnfn convert_char_sat_rtn(ulong);\n"
26842"char __ovld __cnfn convert_char(ulong);\n"
26843"char __ovld __cnfn convert_char_sat(ulong);\n"
26844"char __ovld __cnfn convert_char_rte(float);\n"
26845"char __ovld __cnfn convert_char_sat_rte(float);\n"
26846"char __ovld __cnfn convert_char_rtz(float);\n"
26847"char __ovld __cnfn convert_char_sat_rtz(float);\n"
26848"char __ovld __cnfn convert_char_rtp(float);\n"
26849"char __ovld __cnfn convert_char_sat_rtp(float);\n"
26850"char __ovld __cnfn convert_char_rtn(float);\n"
26851"char __ovld __cnfn convert_char_sat_rtn(float);\n"
26852"char __ovld __cnfn convert_char(float);\n"
26853"char __ovld __cnfn convert_char_sat(float);\n"
26854"uchar __ovld __cnfn convert_uchar_rte(char);\n"
26855"uchar __ovld __cnfn convert_uchar_sat_rte(char);\n"
26856"uchar __ovld __cnfn convert_uchar_rtz(char);\n"
26857"uchar __ovld __cnfn convert_uchar_sat_rtz(char);\n"
26858"uchar __ovld __cnfn convert_uchar_rtp(char);\n"
26859"uchar __ovld __cnfn convert_uchar_sat_rtp(char);\n"
26860"uchar __ovld __cnfn convert_uchar_rtn(char);\n"
26861"uchar __ovld __cnfn convert_uchar_sat_rtn(char);\n"
26862"uchar __ovld __cnfn convert_uchar(char);\n"
26863"uchar __ovld __cnfn convert_uchar_sat(char);\n"
26864"uchar __ovld __cnfn convert_uchar_rte(uchar);\n"
26865"uchar __ovld __cnfn convert_uchar_sat_rte(uchar);\n"
26866"uchar __ovld __cnfn convert_uchar_rtz(uchar);\n"
26867"uchar __ovld __cnfn convert_uchar_sat_rtz(uchar);\n"
26868"uchar __ovld __cnfn convert_uchar_rtp(uchar);\n"
26869"uchar __ovld __cnfn convert_uchar_sat_rtp(uchar);\n"
26870"uchar __ovld __cnfn convert_uchar_rtn(uchar);\n"
26871"uchar __ovld __cnfn convert_uchar_sat_rtn(uchar);\n"
26872"uchar __ovld __cnfn convert_uchar(uchar);\n"
26873"uchar __ovld __cnfn convert_uchar_sat(uchar);\n"
26874"uchar __ovld __cnfn convert_uchar_rte(short);\n"
26875"uchar __ovld __cnfn convert_uchar_sat_rte(short);\n"
26876"uchar __ovld __cnfn convert_uchar_rtz(short);\n"
26877"uchar __ovld __cnfn convert_uchar_sat_rtz(short);\n"
26878"uchar __ovld __cnfn convert_uchar_rtp(short);\n"
26879"uchar __ovld __cnfn convert_uchar_sat_rtp(short);\n"
26880"uchar __ovld __cnfn convert_uchar_rtn(short);\n"
26881"uchar __ovld __cnfn convert_uchar_sat_rtn(short);\n"
26882"uchar __ovld __cnfn convert_uchar(short);\n"
26883"uchar __ovld __cnfn convert_uchar_sat(short);\n"
26884"uchar __ovld __cnfn convert_uchar_rte(ushort);\n"
26885"uchar __ovld __cnfn convert_uchar_sat_rte(ushort);\n"
26886"uchar __ovld __cnfn convert_uchar_rtz(ushort);\n"
26887"uchar __ovld __cnfn convert_uchar_sat_rtz(ushort);\n"
26888"uchar __ovld __cnfn convert_uchar_rtp(ushort);\n"
26889"uchar __ovld __cnfn convert_uchar_sat_rtp(ushort);\n"
26890"uchar __ovld __cnfn convert_uchar_rtn(ushort);\n"
26891"uchar __ovld __cnfn convert_uchar_sat_rtn(ushort);\n"
26892"uchar __ovld __cnfn convert_uchar(ushort);\n"
26893"uchar __ovld __cnfn convert_uchar_sat(ushort);\n"
26894"uchar __ovld __cnfn convert_uchar_rte(int);\n"
26895"uchar __ovld __cnfn convert_uchar_sat_rte(int);\n"
26896"uchar __ovld __cnfn convert_uchar_rtz(int);\n"
26897"uchar __ovld __cnfn convert_uchar_sat_rtz(int);\n"
26898"uchar __ovld __cnfn convert_uchar_rtp(int);\n"
26899"uchar __ovld __cnfn convert_uchar_sat_rtp(int);\n"
26900"uchar __ovld __cnfn convert_uchar_rtn(int);\n"
26901"uchar __ovld __cnfn convert_uchar_sat_rtn(int);\n"
26902"uchar __ovld __cnfn convert_uchar(int);\n"
26903"uchar __ovld __cnfn convert_uchar_sat(int);\n"
26904"uchar __ovld __cnfn convert_uchar_rte(uint);\n"
26905"uchar __ovld __cnfn convert_uchar_sat_rte(uint);\n"
26906"uchar __ovld __cnfn convert_uchar_rtz(uint);\n"
26907"uchar __ovld __cnfn convert_uchar_sat_rtz(uint);\n"
26908"uchar __ovld __cnfn convert_uchar_rtp(uint);\n"
26909"uchar __ovld __cnfn convert_uchar_sat_rtp(uint);\n"
26910"uchar __ovld __cnfn convert_uchar_rtn(uint);\n"
26911"uchar __ovld __cnfn convert_uchar_sat_rtn(uint);\n"
26912"uchar __ovld __cnfn convert_uchar(uint);\n"
26913"uchar __ovld __cnfn convert_uchar_sat(uint);\n"
26914"uchar __ovld __cnfn convert_uchar_rte(long);\n"
26915"uchar __ovld __cnfn convert_uchar_sat_rte(long);\n"
26916"uchar __ovld __cnfn convert_uchar_rtz(long);\n"
26917"uchar __ovld __cnfn convert_uchar_sat_rtz(long);\n"
26918"uchar __ovld __cnfn convert_uchar_rtp(long);\n"
26919"uchar __ovld __cnfn convert_uchar_sat_rtp(long);\n"
26920"uchar __ovld __cnfn convert_uchar_rtn(long);\n"
26921"uchar __ovld __cnfn convert_uchar_sat_rtn(long);\n"
26922"uchar __ovld __cnfn convert_uchar(long);\n"
26923"uchar __ovld __cnfn convert_uchar_sat(long);\n"
26924"uchar __ovld __cnfn convert_uchar_rte(ulong);\n"
26925"uchar __ovld __cnfn convert_uchar_sat_rte(ulong);\n"
26926"uchar __ovld __cnfn convert_uchar_rtz(ulong);\n"
26927"uchar __ovld __cnfn convert_uchar_sat_rtz(ulong);\n"
26928"uchar __ovld __cnfn convert_uchar_rtp(ulong);\n"
26929"uchar __ovld __cnfn convert_uchar_sat_rtp(ulong);\n"
26930"uchar __ovld __cnfn convert_uchar_rtn(ulong);\n"
26931"uchar __ovld __cnfn convert_uchar_sat_rtn(ulong);\n"
26932"uchar __ovld __cnfn convert_uchar(ulong);\n"
26933"uchar __ovld __cnfn convert_uchar_sat(ulong);\n"
26934"uchar __ovld __cnfn convert_uchar_rte(float);\n"
26935"uchar __ovld __cnfn convert_uchar_sat_rte(float);\n"
26936"uchar __ovld __cnfn convert_uchar_rtz(float);\n"
26937"uchar __ovld __cnfn convert_uchar_sat_rtz(float);\n"
26938"uchar __ovld __cnfn convert_uchar_rtp(float);\n"
26939"uchar __ovld __cnfn convert_uchar_sat_rtp(float);\n"
26940"uchar __ovld __cnfn convert_uchar_rtn(float);\n"
26941"uchar __ovld __cnfn convert_uchar_sat_rtn(float);\n"
26942"uchar __ovld __cnfn convert_uchar(float);\n"
26943"uchar __ovld __cnfn convert_uchar_sat(float);\n"
26944"\n"
26945"short __ovld __cnfn convert_short_rte(char);\n"
26946"short __ovld __cnfn convert_short_sat_rte(char);\n"
26947"short __ovld __cnfn convert_short_rtz(char);\n"
26948"short __ovld __cnfn convert_short_sat_rtz(char);\n"
26949"short __ovld __cnfn convert_short_rtp(char);\n"
26950"short __ovld __cnfn convert_short_sat_rtp(char);\n"
26951"short __ovld __cnfn convert_short_rtn(char);\n"
26952"short __ovld __cnfn convert_short_sat_rtn(char);\n"
26953"short __ovld __cnfn convert_short(char);\n"
26954"short __ovld __cnfn convert_short_sat(char);\n"
26955"short __ovld __cnfn convert_short_rte(uchar);\n"
26956"short __ovld __cnfn convert_short_sat_rte(uchar);\n"
26957"short __ovld __cnfn convert_short_rtz(uchar);\n"
26958"short __ovld __cnfn convert_short_sat_rtz(uchar);\n"
26959"short __ovld __cnfn convert_short_rtp(uchar);\n"
26960"short __ovld __cnfn convert_short_sat_rtp(uchar);\n"
26961"short __ovld __cnfn convert_short_rtn(uchar);\n"
26962"short __ovld __cnfn convert_short_sat_rtn(uchar);\n"
26963"short __ovld __cnfn convert_short(uchar);\n"
26964"short __ovld __cnfn convert_short_sat(uchar);\n"
26965"short __ovld __cnfn convert_short_rte(short);\n"
26966"short __ovld __cnfn convert_short_sat_rte(short);\n"
26967"short __ovld __cnfn convert_short_rtz(short);\n"
26968"short __ovld __cnfn convert_short_sat_rtz(short);\n"
26969"short __ovld __cnfn convert_short_rtp(short);\n"
26970"short __ovld __cnfn convert_short_sat_rtp(short);\n"
26971"short __ovld __cnfn convert_short_rtn(short);\n"
26972"short __ovld __cnfn convert_short_sat_rtn(short);\n"
26973"short __ovld __cnfn convert_short(short);\n"
26974"short __ovld __cnfn convert_short_sat(short);\n"
26975"short __ovld __cnfn convert_short_rte(ushort);\n"
26976"short __ovld __cnfn convert_short_sat_rte(ushort);\n"
26977"short __ovld __cnfn convert_short_rtz(ushort);\n"
26978"short __ovld __cnfn convert_short_sat_rtz(ushort);\n"
26979"short __ovld __cnfn convert_short_rtp(ushort);\n"
26980"short __ovld __cnfn convert_short_sat_rtp(ushort);\n"
26981"short __ovld __cnfn convert_short_rtn(ushort);\n"
26982"short __ovld __cnfn convert_short_sat_rtn(ushort);\n"
26983"short __ovld __cnfn convert_short(ushort);\n"
26984"short __ovld __cnfn convert_short_sat(ushort);\n"
26985"short __ovld __cnfn convert_short_rte(int);\n"
26986"short __ovld __cnfn convert_short_sat_rte(int);\n"
26987"short __ovld __cnfn convert_short_rtz(int);\n"
26988"short __ovld __cnfn convert_short_sat_rtz(int);\n"
26989"short __ovld __cnfn convert_short_rtp(int);\n"
26990"short __ovld __cnfn convert_short_sat_rtp(int);\n"
26991"short __ovld __cnfn convert_short_rtn(int);\n"
26992"short __ovld __cnfn convert_short_sat_rtn(int);\n"
26993"short __ovld __cnfn convert_short(int);\n"
26994"short __ovld __cnfn convert_short_sat(int);\n"
26995"short __ovld __cnfn convert_short_rte(uint);\n"
26996"short __ovld __cnfn convert_short_sat_rte(uint);\n"
26997"short __ovld __cnfn convert_short_rtz(uint);\n"
26998"short __ovld __cnfn convert_short_sat_rtz(uint);\n"
26999"short __ovld __cnfn convert_short_rtp(uint);\n"
27000"short __ovld __cnfn convert_short_sat_rtp(uint);\n"
27001"short __ovld __cnfn convert_short_rtn(uint);\n"
27002"short __ovld __cnfn convert_short_sat_rtn(uint);\n"
27003"short __ovld __cnfn convert_short(uint);\n"
27004"short __ovld __cnfn convert_short_sat(uint);\n"
27005"short __ovld __cnfn convert_short_rte(long);\n"
27006"short __ovld __cnfn convert_short_sat_rte(long);\n"
27007"short __ovld __cnfn convert_short_rtz(long);\n"
27008"short __ovld __cnfn convert_short_sat_rtz(long);\n"
27009"short __ovld __cnfn convert_short_rtp(long);\n"
27010"short __ovld __cnfn convert_short_sat_rtp(long);\n"
27011"short __ovld __cnfn convert_short_rtn(long);\n"
27012"short __ovld __cnfn convert_short_sat_rtn(long);\n"
27013"short __ovld __cnfn convert_short(long);\n"
27014"short __ovld __cnfn convert_short_sat(long);\n"
27015"short __ovld __cnfn convert_short_rte(ulong);\n"
27016"short __ovld __cnfn convert_short_sat_rte(ulong);\n"
27017"short __ovld __cnfn convert_short_rtz(ulong);\n"
27018"short __ovld __cnfn convert_short_sat_rtz(ulong);\n"
27019"short __ovld __cnfn convert_short_rtp(ulong);\n"
27020"short __ovld __cnfn convert_short_sat_rtp(ulong);\n"
27021"short __ovld __cnfn convert_short_rtn(ulong);\n"
27022"short __ovld __cnfn convert_short_sat_rtn(ulong);\n"
27023"short __ovld __cnfn convert_short(ulong);\n"
27024"short __ovld __cnfn convert_short_sat(ulong);\n"
27025"short __ovld __cnfn convert_short_rte(float);\n"
27026"short __ovld __cnfn convert_short_sat_rte(float);\n"
27027"short __ovld __cnfn convert_short_rtz(float);\n"
27028"short __ovld __cnfn convert_short_sat_rtz(float);\n"
27029"short __ovld __cnfn convert_short_rtp(float);\n"
27030"short __ovld __cnfn convert_short_sat_rtp(float);\n"
27031"short __ovld __cnfn convert_short_rtn(float);\n"
27032"short __ovld __cnfn convert_short_sat_rtn(float);\n"
27033"short __ovld __cnfn convert_short(float);\n"
27034"short __ovld __cnfn convert_short_sat(float);\n"
27035"ushort __ovld __cnfn convert_ushort_rte(char);\n"
27036"ushort __ovld __cnfn convert_ushort_sat_rte(char);\n"
27037"ushort __ovld __cnfn convert_ushort_rtz(char);\n"
27038"ushort __ovld __cnfn convert_ushort_sat_rtz(char);\n"
27039"ushort __ovld __cnfn convert_ushort_rtp(char);\n"
27040"ushort __ovld __cnfn convert_ushort_sat_rtp(char);\n"
27041"ushort __ovld __cnfn convert_ushort_rtn(char);\n"
27042"ushort __ovld __cnfn convert_ushort_sat_rtn(char);\n"
27043"ushort __ovld __cnfn convert_ushort(char);\n"
27044"ushort __ovld __cnfn convert_ushort_sat(char);\n"
27045"ushort __ovld __cnfn convert_ushort_rte(uchar);\n"
27046"ushort __ovld __cnfn convert_ushort_sat_rte(uchar);\n"
27047"ushort __ovld __cnfn convert_ushort_rtz(uchar);\n"
27048"ushort __ovld __cnfn convert_ushort_sat_rtz(uchar);\n"
27049"ushort __ovld __cnfn convert_ushort_rtp(uchar);\n"
27050"ushort __ovld __cnfn convert_ushort_sat_rtp(uchar);\n"
27051"ushort __ovld __cnfn convert_ushort_rtn(uchar);\n"
27052"ushort __ovld __cnfn convert_ushort_sat_rtn(uchar);\n"
27053"ushort __ovld __cnfn convert_ushort(uchar);\n"
27054"ushort __ovld __cnfn convert_ushort_sat(uchar);\n"
27055"ushort __ovld __cnfn convert_ushort_rte(short);\n"
27056"ushort __ovld __cnfn convert_ushort_sat_rte(short);\n"
27057"ushort __ovld __cnfn convert_ushort_rtz(short);\n"
27058"ushort __ovld __cnfn convert_ushort_sat_rtz(short);\n"
27059"ushort __ovld __cnfn convert_ushort_rtp(short);\n"
27060"ushort __ovld __cnfn convert_ushort_sat_rtp(short);\n"
27061"ushort __ovld __cnfn convert_ushort_rtn(short);\n"
27062"ushort __ovld __cnfn convert_ushort_sat_rtn(short);\n"
27063"ushort __ovld __cnfn convert_ushort(short);\n"
27064"ushort __ovld __cnfn convert_ushort_sat(short);\n"
27065"ushort __ovld __cnfn convert_ushort_rte(ushort);\n"
27066"ushort __ovld __cnfn convert_ushort_sat_rte(ushort);\n"
27067"ushort __ovld __cnfn convert_ushort_rtz(ushort);\n"
27068"ushort __ovld __cnfn convert_ushort_sat_rtz(ushort);\n"
27069"ushort __ovld __cnfn convert_ushort_rtp(ushort);\n"
27070"ushort __ovld __cnfn convert_ushort_sat_rtp(ushort);\n"
27071"ushort __ovld __cnfn convert_ushort_rtn(ushort);\n"
27072"ushort __ovld __cnfn convert_ushort_sat_rtn(ushort);\n"
27073"ushort __ovld __cnfn convert_ushort(ushort);\n"
27074"ushort __ovld __cnfn convert_ushort_sat(ushort);\n"
27075"ushort __ovld __cnfn convert_ushort_rte(int);\n"
27076"ushort __ovld __cnfn convert_ushort_sat_rte(int);\n"
27077"ushort __ovld __cnfn convert_ushort_rtz(int);\n"
27078"ushort __ovld __cnfn convert_ushort_sat_rtz(int);\n"
27079"ushort __ovld __cnfn convert_ushort_rtp(int);\n"
27080"ushort __ovld __cnfn convert_ushort_sat_rtp(int);\n"
27081"ushort __ovld __cnfn convert_ushort_rtn(int);\n"
27082"ushort __ovld __cnfn convert_ushort_sat_rtn(int);\n"
27083"ushort __ovld __cnfn convert_ushort(int);\n"
27084"ushort __ovld __cnfn convert_ushort_sat(int);\n"
27085"ushort __ovld __cnfn convert_ushort_rte(uint);\n"
27086"ushort __ovld __cnfn convert_ushort_sat_rte(uint);\n"
27087"ushort __ovld __cnfn convert_ushort_rtz(uint);\n"
27088"ushort __ovld __cnfn convert_ushort_sat_rtz(uint);\n"
27089"ushort __ovld __cnfn convert_ushort_rtp(uint);\n"
27090"ushort __ovld __cnfn convert_ushort_sat_rtp(uint);\n"
27091"ushort __ovld __cnfn convert_ushort_rtn(uint);\n"
27092"ushort __ovld __cnfn convert_ushort_sat_rtn(uint);\n"
27093"ushort __ovld __cnfn convert_ushort(uint);\n"
27094"ushort __ovld __cnfn convert_ushort_sat(uint);\n"
27095"ushort __ovld __cnfn convert_ushort_rte(long);\n"
27096"ushort __ovld __cnfn convert_ushort_sat_rte(long);\n"
27097"ushort __ovld __cnfn convert_ushort_rtz(long);\n"
27098"ushort __ovld __cnfn convert_ushort_sat_rtz(long);\n"
27099"ushort __ovld __cnfn convert_ushort_rtp(long);\n"
27100"ushort __ovld __cnfn convert_ushort_sat_rtp(long);\n"
27101"ushort __ovld __cnfn convert_ushort_rtn(long);\n"
27102"ushort __ovld __cnfn convert_ushort_sat_rtn(long);\n"
27103"ushort __ovld __cnfn convert_ushort(long);\n"
27104"ushort __ovld __cnfn convert_ushort_sat(long);\n"
27105"ushort __ovld __cnfn convert_ushort_rte(ulong);\n"
27106"ushort __ovld __cnfn convert_ushort_sat_rte(ulong);\n"
27107"ushort __ovld __cnfn convert_ushort_rtz(ulong);\n"
27108"ushort __ovld __cnfn convert_ushort_sat_rtz(ulong);\n"
27109"ushort __ovld __cnfn convert_ushort_rtp(ulong);\n"
27110"ushort __ovld __cnfn convert_ushort_sat_rtp(ulong);\n"
27111"ushort __ovld __cnfn convert_ushort_rtn(ulong);\n"
27112"ushort __ovld __cnfn convert_ushort_sat_rtn(ulong);\n"
27113"ushort __ovld __cnfn convert_ushort(ulong);\n"
27114"ushort __ovld __cnfn convert_ushort_sat(ulong);\n"
27115"ushort __ovld __cnfn convert_ushort_rte(float);\n"
27116"ushort __ovld __cnfn convert_ushort_sat_rte(float);\n"
27117"ushort __ovld __cnfn convert_ushort_rtz(float);\n"
27118"ushort __ovld __cnfn convert_ushort_sat_rtz(float);\n"
27119"ushort __ovld __cnfn convert_ushort_rtp(float);\n"
27120"ushort __ovld __cnfn convert_ushort_sat_rtp(float);\n"
27121"ushort __ovld __cnfn convert_ushort_rtn(float);\n"
27122"ushort __ovld __cnfn convert_ushort_sat_rtn(float);\n"
27123"ushort __ovld __cnfn convert_ushort(float);\n"
27124"ushort __ovld __cnfn convert_ushort_sat(float);\n"
27125"int __ovld __cnfn convert_int_rte(char);\n"
27126"int __ovld __cnfn convert_int_sat_rte(char);\n"
27127"int __ovld __cnfn convert_int_rtz(char);\n"
27128"int __ovld __cnfn convert_int_sat_rtz(char);\n"
27129"int __ovld __cnfn convert_int_rtp(char);\n"
27130"int __ovld __cnfn convert_int_sat_rtp(char);\n"
27131"int __ovld __cnfn convert_int_rtn(char);\n"
27132"int __ovld __cnfn convert_int_sat_rtn(char);\n"
27133"int __ovld __cnfn convert_int(char);\n"
27134"int __ovld __cnfn convert_int_sat(char);\n"
27135"int __ovld __cnfn convert_int_rte(uchar);\n"
27136"int __ovld __cnfn convert_int_sat_rte(uchar);\n"
27137"int __ovld __cnfn convert_int_rtz(uchar);\n"
27138"int __ovld __cnfn convert_int_sat_rtz(uchar);\n"
27139"int __ovld __cnfn convert_int_rtp(uchar);\n"
27140"int __ovld __cnfn convert_int_sat_rtp(uchar);\n"
27141"int __ovld __cnfn convert_int_rtn(uchar);\n"
27142"int __ovld __cnfn convert_int_sat_rtn(uchar);\n"
27143"int __ovld __cnfn convert_int(uchar);\n"
27144"int __ovld __cnfn convert_int_sat(uchar);\n"
27145"int __ovld __cnfn convert_int_rte(short);\n"
27146"int __ovld __cnfn convert_int_sat_rte(short);\n"
27147"int __ovld __cnfn convert_int_rtz(short);\n"
27148"int __ovld __cnfn convert_int_sat_rtz(short);\n"
27149"int __ovld __cnfn convert_int_rtp(short);\n"
27150"int __ovld __cnfn convert_int_sat_rtp(short);\n"
27151"int __ovld __cnfn convert_int_rtn(short);\n"
27152"int __ovld __cnfn convert_int_sat_rtn(short);\n"
27153"int __ovld __cnfn convert_int(short);\n"
27154"int __ovld __cnfn convert_int_sat(short);\n"
27155"int __ovld __cnfn convert_int_rte(ushort);\n"
27156"int __ovld __cnfn convert_int_sat_rte(ushort);\n"
27157"int __ovld __cnfn convert_int_rtz(ushort);\n"
27158"int __ovld __cnfn convert_int_sat_rtz(ushort);\n"
27159"int __ovld __cnfn convert_int_rtp(ushort);\n"
27160"int __ovld __cnfn convert_int_sat_rtp(ushort);\n"
27161"int __ovld __cnfn convert_int_rtn(ushort);\n"
27162"int __ovld __cnfn convert_int_sat_rtn(ushort);\n"
27163"int __ovld __cnfn convert_int(ushort);\n"
27164"int __ovld __cnfn convert_int_sat(ushort);\n"
27165"int __ovld __cnfn convert_int_rte(int);\n"
27166"int __ovld __cnfn convert_int_sat_rte(int);\n"
27167"int __ovld __cnfn convert_int_rtz(int);\n"
27168"int __ovld __cnfn convert_int_sat_rtz(int);\n"
27169"int __ovld __cnfn convert_int_rtp(int);\n"
27170"int __ovld __cnfn convert_int_sat_rtp(int);\n"
27171"int __ovld __cnfn convert_int_rtn(int);\n"
27172"int __ovld __cnfn convert_int_sat_rtn(int);\n"
27173"int __ovld __cnfn convert_int(int);\n"
27174"int __ovld __cnfn convert_int_sat(int);\n"
27175"int __ovld __cnfn convert_int_rte(uint);\n"
27176"int __ovld __cnfn convert_int_sat_rte(uint);\n"
27177"int __ovld __cnfn convert_int_rtz(uint);\n"
27178"int __ovld __cnfn convert_int_sat_rtz(uint);\n"
27179"int __ovld __cnfn convert_int_rtp(uint);\n"
27180"int __ovld __cnfn convert_int_sat_rtp(uint);\n"
27181"int __ovld __cnfn convert_int_rtn(uint);\n"
27182"int __ovld __cnfn convert_int_sat_rtn(uint);\n"
27183"int __ovld __cnfn convert_int(uint);\n"
27184"int __ovld __cnfn convert_int_sat(uint);\n"
27185"int __ovld __cnfn convert_int_rte(long);\n"
27186"int __ovld __cnfn convert_int_sat_rte(long);\n"
27187"int __ovld __cnfn convert_int_rtz(long);\n"
27188"int __ovld __cnfn convert_int_sat_rtz(long);\n"
27189"int __ovld __cnfn convert_int_rtp(long);\n"
27190"int __ovld __cnfn convert_int_sat_rtp(long);\n"
27191"int __ovld __cnfn convert_int_rtn(long);\n"
27192"int __ovld __cnfn convert_int_sat_rtn(long);\n"
27193"int __ovld __cnfn convert_int(long);\n"
27194"int __ovld __cnfn convert_int_sat(long);\n"
27195"int __ovld __cnfn convert_int_rte(ulong);\n"
27196"int __ovld __cnfn convert_int_sat_rte(ulong);\n"
27197"int __ovld __cnfn convert_int_rtz(ulong);\n"
27198"int __ovld __cnfn convert_int_sat_rtz(ulong);\n"
27199"int __ovld __cnfn convert_int_rtp(ulong);\n"
27200"int __ovld __cnfn convert_int_sat_rtp(ulong);\n"
27201"int __ovld __cnfn convert_int_rtn(ulong);\n"
27202"int __ovld __cnfn convert_int_sat_rtn(ulong);\n"
27203"int __ovld __cnfn convert_int(ulong);\n"
27204"int __ovld __cnfn convert_int_sat(ulong);\n"
27205"int __ovld __cnfn convert_int_rte(float);\n"
27206"int __ovld __cnfn convert_int_sat_rte(float);\n"
27207"int __ovld __cnfn convert_int_rtz(float);\n"
27208"int __ovld __cnfn convert_int_sat_rtz(float);\n"
27209"int __ovld __cnfn convert_int_rtp(float);\n"
27210"int __ovld __cnfn convert_int_sat_rtp(float);\n"
27211"int __ovld __cnfn convert_int_rtn(float);\n"
27212"int __ovld __cnfn convert_int_sat_rtn(float);\n"
27213"int __ovld __cnfn convert_int(float);\n"
27214"int __ovld __cnfn convert_int_sat(float);\n"
27215"uint __ovld __cnfn convert_uint_rte(char);\n"
27216"uint __ovld __cnfn convert_uint_sat_rte(char);\n"
27217"uint __ovld __cnfn convert_uint_rtz(char);\n"
27218"uint __ovld __cnfn convert_uint_sat_rtz(char);\n"
27219"uint __ovld __cnfn convert_uint_rtp(char);\n"
27220"uint __ovld __cnfn convert_uint_sat_rtp(char);\n"
27221"uint __ovld __cnfn convert_uint_rtn(char);\n"
27222"uint __ovld __cnfn convert_uint_sat_rtn(char);\n"
27223"uint __ovld __cnfn convert_uint(char);\n"
27224"uint __ovld __cnfn convert_uint_sat(char);\n"
27225"uint __ovld __cnfn convert_uint_rte(uchar);\n"
27226"uint __ovld __cnfn convert_uint_sat_rte(uchar);\n"
27227"uint __ovld __cnfn convert_uint_rtz(uchar);\n"
27228"uint __ovld __cnfn convert_uint_sat_rtz(uchar);\n"
27229"uint __ovld __cnfn convert_uint_rtp(uchar);\n"
27230"uint __ovld __cnfn convert_uint_sat_rtp(uchar);\n"
27231"uint __ovld __cnfn convert_uint_rtn(uchar);\n"
27232"uint __ovld __cnfn convert_uint_sat_rtn(uchar);\n"
27233"uint __ovld __cnfn convert_uint(uchar);\n"
27234"uint __ovld __cnfn convert_uint_sat(uchar);\n"
27235"uint __ovld __cnfn convert_uint_rte(short);\n"
27236"uint __ovld __cnfn convert_uint_sat_rte(short);\n"
27237"uint __ovld __cnfn convert_uint_rtz(short);\n"
27238"uint __ovld __cnfn convert_uint_sat_rtz(short);\n"
27239"uint __ovld __cnfn convert_uint_rtp(short);\n"
27240"uint __ovld __cnfn convert_uint_sat_rtp(short);\n"
27241"uint __ovld __cnfn convert_uint_rtn(short);\n"
27242"uint __ovld __cnfn convert_uint_sat_rtn(short);\n"
27243"uint __ovld __cnfn convert_uint(short);\n"
27244"uint __ovld __cnfn convert_uint_sat(short);\n"
27245"uint __ovld __cnfn convert_uint_rte(ushort);\n"
27246"uint __ovld __cnfn convert_uint_sat_rte(ushort);\n"
27247"uint __ovld __cnfn convert_uint_rtz(ushort);\n"
27248"uint __ovld __cnfn convert_uint_sat_rtz(ushort);\n"
27249"uint __ovld __cnfn convert_uint_rtp(ushort);\n"
27250"uint __ovld __cnfn convert_uint_sat_rtp(ushort);\n"
27251"uint __ovld __cnfn convert_uint_rtn(ushort);\n"
27252"uint __ovld __cnfn convert_uint_sat_rtn(ushort);\n"
27253"uint __ovld __cnfn convert_uint(ushort);\n"
27254"uint __ovld __cnfn convert_uint_sat(ushort);\n"
27255"uint __ovld __cnfn convert_uint_rte(int);\n"
27256"uint __ovld __cnfn convert_uint_sat_rte(int);\n"
27257"uint __ovld __cnfn convert_uint_rtz(int);\n"
27258"uint __ovld __cnfn convert_uint_sat_rtz(int);\n"
27259"uint __ovld __cnfn convert_uint_rtp(int);\n"
27260"uint __ovld __cnfn convert_uint_sat_rtp(int);\n"
27261"uint __ovld __cnfn convert_uint_rtn(int);\n"
27262"uint __ovld __cnfn convert_uint_sat_rtn(int);\n"
27263"uint __ovld __cnfn convert_uint(int);\n"
27264"uint __ovld __cnfn convert_uint_sat(int);\n"
27265"uint __ovld __cnfn convert_uint_rte(uint);\n"
27266"uint __ovld __cnfn convert_uint_sat_rte(uint);\n"
27267"uint __ovld __cnfn convert_uint_rtz(uint);\n"
27268"uint __ovld __cnfn convert_uint_sat_rtz(uint);\n"
27269"uint __ovld __cnfn convert_uint_rtp(uint);\n"
27270"uint __ovld __cnfn convert_uint_sat_rtp(uint);\n"
27271"uint __ovld __cnfn convert_uint_rtn(uint);\n"
27272"uint __ovld __cnfn convert_uint_sat_rtn(uint);\n"
27273"uint __ovld __cnfn convert_uint(uint);\n"
27274"uint __ovld __cnfn convert_uint_sat(uint);\n"
27275"uint __ovld __cnfn convert_uint_rte(long);\n"
27276"uint __ovld __cnfn convert_uint_sat_rte(long);\n"
27277"uint __ovld __cnfn convert_uint_rtz(long);\n"
27278"uint __ovld __cnfn convert_uint_sat_rtz(long);\n"
27279"uint __ovld __cnfn convert_uint_rtp(long);\n"
27280"uint __ovld __cnfn convert_uint_sat_rtp(long);\n"
27281"uint __ovld __cnfn convert_uint_rtn(long);\n"
27282"uint __ovld __cnfn convert_uint_sat_rtn(long);\n"
27283"uint __ovld __cnfn convert_uint(long);\n"
27284"uint __ovld __cnfn convert_uint_sat(long);\n"
27285"uint __ovld __cnfn convert_uint_rte(ulong);\n"
27286"uint __ovld __cnfn convert_uint_sat_rte(ulong);\n"
27287"uint __ovld __cnfn convert_uint_rtz(ulong);\n"
27288"uint __ovld __cnfn convert_uint_sat_rtz(ulong);\n"
27289"uint __ovld __cnfn convert_uint_rtp(ulong);\n"
27290"uint __ovld __cnfn convert_uint_sat_rtp(ulong);\n"
27291"uint __ovld __cnfn convert_uint_rtn(ulong);\n"
27292"uint __ovld __cnfn convert_uint_sat_rtn(ulong);\n"
27293"uint __ovld __cnfn convert_uint(ulong);\n"
27294"uint __ovld __cnfn convert_uint_sat(ulong);\n"
27295"uint __ovld __cnfn convert_uint_rte(float);\n"
27296"uint __ovld __cnfn convert_uint_sat_rte(float);\n"
27297"uint __ovld __cnfn convert_uint_rtz(float);\n"
27298"uint __ovld __cnfn convert_uint_sat_rtz(float);\n"
27299"uint __ovld __cnfn convert_uint_rtp(float);\n"
27300"uint __ovld __cnfn convert_uint_sat_rtp(float);\n"
27301"uint __ovld __cnfn convert_uint_rtn(float);\n"
27302"uint __ovld __cnfn convert_uint_sat_rtn(float);\n"
27303"uint __ovld __cnfn convert_uint(float);\n"
27304"uint __ovld __cnfn convert_uint_sat(float);\n"
27305"long __ovld __cnfn convert_long_rte(char);\n"
27306"long __ovld __cnfn convert_long_sat_rte(char);\n"
27307"long __ovld __cnfn convert_long_rtz(char);\n"
27308"long __ovld __cnfn convert_long_sat_rtz(char);\n"
27309"long __ovld __cnfn convert_long_rtp(char);\n"
27310"long __ovld __cnfn convert_long_sat_rtp(char);\n"
27311"long __ovld __cnfn convert_long_rtn(char);\n"
27312"long __ovld __cnfn convert_long_sat_rtn(char);\n"
27313"long __ovld __cnfn convert_long(char);\n"
27314"long __ovld __cnfn convert_long_sat(char);\n"
27315"long __ovld __cnfn convert_long_rte(uchar);\n"
27316"long __ovld __cnfn convert_long_sat_rte(uchar);\n"
27317"long __ovld __cnfn convert_long_rtz(uchar);\n"
27318"long __ovld __cnfn convert_long_sat_rtz(uchar);\n"
27319"long __ovld __cnfn convert_long_rtp(uchar);\n"
27320"long __ovld __cnfn convert_long_sat_rtp(uchar);\n"
27321"long __ovld __cnfn convert_long_rtn(uchar);\n"
27322"long __ovld __cnfn convert_long_sat_rtn(uchar);\n"
27323"long __ovld __cnfn convert_long(uchar);\n"
27324"long __ovld __cnfn convert_long_sat(uchar);\n"
27325"long __ovld __cnfn convert_long_rte(short);\n"
27326"long __ovld __cnfn convert_long_sat_rte(short);\n"
27327"long __ovld __cnfn convert_long_rtz(short);\n"
27328"long __ovld __cnfn convert_long_sat_rtz(short);\n"
27329"long __ovld __cnfn convert_long_rtp(short);\n"
27330"long __ovld __cnfn convert_long_sat_rtp(short);\n"
27331"long __ovld __cnfn convert_long_rtn(short);\n"
27332"long __ovld __cnfn convert_long_sat_rtn(short);\n"
27333"long __ovld __cnfn convert_long(short);\n"
27334"long __ovld __cnfn convert_long_sat(short);\n"
27335"long __ovld __cnfn convert_long_rte(ushort);\n"
27336"long __ovld __cnfn convert_long_sat_rte(ushort);\n"
27337"long __ovld __cnfn convert_long_rtz(ushort);\n"
27338"long __ovld __cnfn convert_long_sat_rtz(ushort);\n"
27339"long __ovld __cnfn convert_long_rtp(ushort);\n"
27340"long __ovld __cnfn convert_long_sat_rtp(ushort);\n"
27341"long __ovld __cnfn convert_long_rtn(ushort);\n"
27342"long __ovld __cnfn convert_long_sat_rtn(ushort);\n"
27343"long __ovld __cnfn convert_long(ushort);\n"
27344"long __ovld __cnfn convert_long_sat(ushort);\n"
27345"long __ovld __cnfn convert_long_rte(int);\n"
27346"long __ovld __cnfn convert_long_sat_rte(int);\n"
27347"long __ovld __cnfn convert_long_rtz(int);\n"
27348"long __ovld __cnfn convert_long_sat_rtz(int);\n"
27349"long __ovld __cnfn convert_long_rtp(int);\n"
27350"long __ovld __cnfn convert_long_sat_rtp(int);\n"
27351"long __ovld __cnfn convert_long_rtn(int);\n"
27352"long __ovld __cnfn convert_long_sat_rtn(int);\n"
27353"long __ovld __cnfn convert_long(int);\n"
27354"long __ovld __cnfn convert_long_sat(int);\n"
27355"long __ovld __cnfn convert_long_rte(uint);\n"
27356"long __ovld __cnfn convert_long_sat_rte(uint);\n"
27357"long __ovld __cnfn convert_long_rtz(uint);\n"
27358"long __ovld __cnfn convert_long_sat_rtz(uint);\n"
27359"long __ovld __cnfn convert_long_rtp(uint);\n"
27360"long __ovld __cnfn convert_long_sat_rtp(uint);\n"
27361"long __ovld __cnfn convert_long_rtn(uint);\n"
27362"long __ovld __cnfn convert_long_sat_rtn(uint);\n"
27363"long __ovld __cnfn convert_long(uint);\n"
27364"long __ovld __cnfn convert_long_sat(uint);\n"
27365"long __ovld __cnfn convert_long_rte(long);\n"
27366"long __ovld __cnfn convert_long_sat_rte(long);\n"
27367"long __ovld __cnfn convert_long_rtz(long);\n"
27368"long __ovld __cnfn convert_long_sat_rtz(long);\n"
27369"long __ovld __cnfn convert_long_rtp(long);\n"
27370"long __ovld __cnfn convert_long_sat_rtp(long);\n"
27371"long __ovld __cnfn convert_long_rtn(long);\n"
27372"long __ovld __cnfn convert_long_sat_rtn(long);\n"
27373"long __ovld __cnfn convert_long(long);\n"
27374"long __ovld __cnfn convert_long_sat(long);\n"
27375"long __ovld __cnfn convert_long_rte(ulong);\n"
27376"long __ovld __cnfn convert_long_sat_rte(ulong);\n"
27377"long __ovld __cnfn convert_long_rtz(ulong);\n"
27378"long __ovld __cnfn convert_long_sat_rtz(ulong);\n"
27379"long __ovld __cnfn convert_long_rtp(ulong);\n"
27380"long __ovld __cnfn convert_long_sat_rtp(ulong);\n"
27381"long __ovld __cnfn convert_long_rtn(ulong);\n"
27382"long __ovld __cnfn convert_long_sat_rtn(ulong);\n"
27383"long __ovld __cnfn convert_long(ulong);\n"
27384"long __ovld __cnfn convert_long_sat(ulong);\n"
27385"long __ovld __cnfn convert_long_rte(float);\n"
27386"long __ovld __cnfn convert_long_sat_rte(float);\n"
27387"long __ovld __cnfn convert_long_rtz(float);\n"
27388"long __ovld __cnfn convert_long_sat_rtz(float);\n"
27389"long __ovld __cnfn convert_long_rtp(float);\n"
27390"long __ovld __cnfn convert_long_sat_rtp(float);\n"
27391"long __ovld __cnfn convert_long_rtn(float);\n"
27392"long __ovld __cnfn convert_long_sat_rtn(float);\n"
27393"long __ovld __cnfn convert_long(float);\n"
27394"long __ovld __cnfn convert_long_sat(float);\n"
27395"ulong __ovld __cnfn convert_ulong_rte(char);\n"
27396"ulong __ovld __cnfn convert_ulong_sat_rte(char);\n"
27397"ulong __ovld __cnfn convert_ulong_rtz(char);\n"
27398"ulong __ovld __cnfn convert_ulong_sat_rtz(char);\n"
27399"ulong __ovld __cnfn convert_ulong_rtp(char);\n"
27400"ulong __ovld __cnfn convert_ulong_sat_rtp(char);\n"
27401"ulong __ovld __cnfn convert_ulong_rtn(char);\n"
27402"ulong __ovld __cnfn convert_ulong_sat_rtn(char);\n"
27403"ulong __ovld __cnfn convert_ulong(char);\n"
27404"ulong __ovld __cnfn convert_ulong_sat(char);\n"
27405"ulong __ovld __cnfn convert_ulong_rte(uchar);\n"
27406"ulong __ovld __cnfn convert_ulong_sat_rte(uchar);\n"
27407"ulong __ovld __cnfn convert_ulong_rtz(uchar);\n"
27408"ulong __ovld __cnfn convert_ulong_sat_rtz(uchar);\n"
27409"ulong __ovld __cnfn convert_ulong_rtp(uchar);\n"
27410"ulong __ovld __cnfn convert_ulong_sat_rtp(uchar);\n"
27411"ulong __ovld __cnfn convert_ulong_rtn(uchar);\n"
27412"ulong __ovld __cnfn convert_ulong_sat_rtn(uchar);\n"
27413"ulong __ovld __cnfn convert_ulong(uchar);\n"
27414"ulong __ovld __cnfn convert_ulong_sat(uchar);\n"
27415"ulong __ovld __cnfn convert_ulong_rte(short);\n"
27416"ulong __ovld __cnfn convert_ulong_sat_rte(short);\n"
27417"ulong __ovld __cnfn convert_ulong_rtz(short);\n"
27418"ulong __ovld __cnfn convert_ulong_sat_rtz(short);\n"
27419"ulong __ovld __cnfn convert_ulong_rtp(short);\n"
27420"ulong __ovld __cnfn convert_ulong_sat_rtp(short);\n"
27421"ulong __ovld __cnfn convert_ulong_rtn(short);\n"
27422"ulong __ovld __cnfn convert_ulong_sat_rtn(short);\n"
27423"ulong __ovld __cnfn convert_ulong(short);\n"
27424"ulong __ovld __cnfn convert_ulong_sat(short);\n"
27425"ulong __ovld __cnfn convert_ulong_rte(ushort);\n"
27426"ulong __ovld __cnfn convert_ulong_sat_rte(ushort);\n"
27427"ulong __ovld __cnfn convert_ulong_rtz(ushort);\n"
27428"ulong __ovld __cnfn convert_ulong_sat_rtz(ushort);\n"
27429"ulong __ovld __cnfn convert_ulong_rtp(ushort);\n"
27430"ulong __ovld __cnfn convert_ulong_sat_rtp(ushort);\n"
27431"ulong __ovld __cnfn convert_ulong_rtn(ushort);\n"
27432"ulong __ovld __cnfn convert_ulong_sat_rtn(ushort);\n"
27433"ulong __ovld __cnfn convert_ulong(ushort);\n"
27434"ulong __ovld __cnfn convert_ulong_sat(ushort);\n"
27435"ulong __ovld __cnfn convert_ulong_rte(int);\n"
27436"ulong __ovld __cnfn convert_ulong_sat_rte(int);\n"
27437"ulong __ovld __cnfn convert_ulong_rtz(int);\n"
27438"ulong __ovld __cnfn convert_ulong_sat_rtz(int);\n"
27439"ulong __ovld __cnfn convert_ulong_rtp(int);\n"
27440"ulong __ovld __cnfn convert_ulong_sat_rtp(int);\n"
27441"ulong __ovld __cnfn convert_ulong_rtn(int);\n"
27442"ulong __ovld __cnfn convert_ulong_sat_rtn(int);\n"
27443"ulong __ovld __cnfn convert_ulong(int);\n"
27444"ulong __ovld __cnfn convert_ulong_sat(int);\n"
27445"ulong __ovld __cnfn convert_ulong_rte(uint);\n"
27446"ulong __ovld __cnfn convert_ulong_sat_rte(uint);\n"
27447"ulong __ovld __cnfn convert_ulong_rtz(uint);\n"
27448"ulong __ovld __cnfn convert_ulong_sat_rtz(uint);\n"
27449"ulong __ovld __cnfn convert_ulong_rtp(uint);\n"
27450"ulong __ovld __cnfn convert_ulong_sat_rtp(uint);\n"
27451"ulong __ovld __cnfn convert_ulong_rtn(uint);\n"
27452"ulong __ovld __cnfn convert_ulong_sat_rtn(uint);\n"
27453"ulong __ovld __cnfn convert_ulong(uint);\n"
27454"ulong __ovld __cnfn convert_ulong_sat(uint);\n"
27455"ulong __ovld __cnfn convert_ulong_rte(long);\n"
27456"ulong __ovld __cnfn convert_ulong_sat_rte(long);\n"
27457"ulong __ovld __cnfn convert_ulong_rtz(long);\n"
27458"ulong __ovld __cnfn convert_ulong_sat_rtz(long);\n"
27459"ulong __ovld __cnfn convert_ulong_rtp(long);\n"
27460"ulong __ovld __cnfn convert_ulong_sat_rtp(long);\n"
27461"ulong __ovld __cnfn convert_ulong_rtn(long);\n"
27462"ulong __ovld __cnfn convert_ulong_sat_rtn(long);\n"
27463"ulong __ovld __cnfn convert_ulong(long);\n"
27464"ulong __ovld __cnfn convert_ulong_sat(long);\n"
27465"ulong __ovld __cnfn convert_ulong_rte(ulong);\n"
27466"ulong __ovld __cnfn convert_ulong_sat_rte(ulong);\n"
27467"ulong __ovld __cnfn convert_ulong_rtz(ulong);\n"
27468"ulong __ovld __cnfn convert_ulong_sat_rtz(ulong);\n"
27469"ulong __ovld __cnfn convert_ulong_rtp(ulong);\n"
27470"ulong __ovld __cnfn convert_ulong_sat_rtp(ulong);\n"
27471"ulong __ovld __cnfn convert_ulong_rtn(ulong);\n"
27472"ulong __ovld __cnfn convert_ulong_sat_rtn(ulong);\n"
27473"ulong __ovld __cnfn convert_ulong(ulong);\n"
27474"ulong __ovld __cnfn convert_ulong_sat(ulong);\n"
27475"ulong __ovld __cnfn convert_ulong_rte(float);\n"
27476"ulong __ovld __cnfn convert_ulong_sat_rte(float);\n"
27477"ulong __ovld __cnfn convert_ulong_rtz(float);\n"
27478"ulong __ovld __cnfn convert_ulong_sat_rtz(float);\n"
27479"ulong __ovld __cnfn convert_ulong_rtp(float);\n"
27480"ulong __ovld __cnfn convert_ulong_sat_rtp(float);\n"
27481"ulong __ovld __cnfn convert_ulong_rtn(float);\n"
27482"ulong __ovld __cnfn convert_ulong_sat_rtn(float);\n"
27483"ulong __ovld __cnfn convert_ulong(float);\n"
27484"ulong __ovld __cnfn convert_ulong_sat(float);\n"
27485"float __ovld __cnfn convert_float_rte(char);\n"
27486"float __ovld __cnfn convert_float_rtz(char);\n"
27487"float __ovld __cnfn convert_float_rtp(char);\n"
27488"float __ovld __cnfn convert_float_rtn(char);\n"
27489"float __ovld __cnfn convert_float(char);\n"
27490"float __ovld __cnfn convert_float_rte(uchar);\n"
27491"float __ovld __cnfn convert_float_rtz(uchar);\n"
27492"float __ovld __cnfn convert_float_rtp(uchar);\n"
27493"float __ovld __cnfn convert_float_rtn(uchar);\n"
27494"float __ovld __cnfn convert_float(uchar);\n"
27495"float __ovld __cnfn convert_float_rte(short);\n"
27496"float __ovld __cnfn convert_float_rtz(short);\n"
27497"float __ovld __cnfn convert_float_rtp(short);\n"
27498"float __ovld __cnfn convert_float_rtn(short);\n"
27499"float __ovld __cnfn convert_float(short);\n"
27500"float __ovld __cnfn convert_float_rte(ushort);\n"
27501"float __ovld __cnfn convert_float_rtz(ushort);\n"
27502"float __ovld __cnfn convert_float_rtp(ushort);\n"
27503"float __ovld __cnfn convert_float_rtn(ushort);\n"
27504"float __ovld __cnfn convert_float(ushort);\n"
27505"float __ovld __cnfn convert_float_rte(int);\n"
27506"float __ovld __cnfn convert_float_rtz(int);\n"
27507"float __ovld __cnfn convert_float_rtp(int);\n"
27508"float __ovld __cnfn convert_float_rtn(int);\n"
27509"float __ovld __cnfn convert_float(int);\n"
27510"float __ovld __cnfn convert_float_rte(uint);\n"
27511"float __ovld __cnfn convert_float_rtz(uint);\n"
27512"float __ovld __cnfn convert_float_rtp(uint);\n"
27513"float __ovld __cnfn convert_float_rtn(uint);\n"
27514"float __ovld __cnfn convert_float(uint);\n"
27515"float __ovld __cnfn convert_float_rte(long);\n"
27516"float __ovld __cnfn convert_float_rtz(long);\n"
27517"float __ovld __cnfn convert_float_rtp(long);\n"
27518"float __ovld __cnfn convert_float_rtn(long);\n"
27519"float __ovld __cnfn convert_float(long);\n"
27520"float __ovld __cnfn convert_float_rte(ulong);\n"
27521"float __ovld __cnfn convert_float_rtz(ulong);\n"
27522"float __ovld __cnfn convert_float_rtp(ulong);\n"
27523"float __ovld __cnfn convert_float_rtn(ulong);\n"
27524"float __ovld __cnfn convert_float(ulong);\n"
27525"float __ovld __cnfn convert_float_rte(float);\n"
27526"float __ovld __cnfn convert_float_rtz(float);\n"
27527"float __ovld __cnfn convert_float_rtp(float);\n"
27528"float __ovld __cnfn convert_float_rtn(float);\n"
27529"float __ovld __cnfn convert_float(float);\n"
27530"char2 __ovld __cnfn convert_char2_rte(char2);\n"
27531"char2 __ovld __cnfn convert_char2_sat_rte(char2);\n"
27532"char2 __ovld __cnfn convert_char2_rtz(char2);\n"
27533"char2 __ovld __cnfn convert_char2_sat_rtz(char2);\n"
27534"char2 __ovld __cnfn convert_char2_rtp(char2);\n"
27535"char2 __ovld __cnfn convert_char2_sat_rtp(char2);\n"
27536"char2 __ovld __cnfn convert_char2_rtn(char2);\n"
27537"char2 __ovld __cnfn convert_char2_sat_rtn(char2);\n"
27538"char2 __ovld __cnfn convert_char2(char2);\n"
27539"char2 __ovld __cnfn convert_char2_sat(char2);\n"
27540"char2 __ovld __cnfn convert_char2_rte(uchar2);\n"
27541"char2 __ovld __cnfn convert_char2_sat_rte(uchar2);\n"
27542"char2 __ovld __cnfn convert_char2_rtz(uchar2);\n"
27543"char2 __ovld __cnfn convert_char2_sat_rtz(uchar2);\n"
27544"char2 __ovld __cnfn convert_char2_rtp(uchar2);\n"
27545"char2 __ovld __cnfn convert_char2_sat_rtp(uchar2);\n"
27546"char2 __ovld __cnfn convert_char2_rtn(uchar2);\n"
27547"char2 __ovld __cnfn convert_char2_sat_rtn(uchar2);\n"
27548"char2 __ovld __cnfn convert_char2(uchar2);\n"
27549"char2 __ovld __cnfn convert_char2_sat(uchar2);\n"
27550"char2 __ovld __cnfn convert_char2_rte(short2);\n"
27551"char2 __ovld __cnfn convert_char2_sat_rte(short2);\n"
27552"char2 __ovld __cnfn convert_char2_rtz(short2);\n"
27553"char2 __ovld __cnfn convert_char2_sat_rtz(short2);\n"
27554"char2 __ovld __cnfn convert_char2_rtp(short2);\n"
27555"char2 __ovld __cnfn convert_char2_sat_rtp(short2);\n"
27556"char2 __ovld __cnfn convert_char2_rtn(short2);\n"
27557"char2 __ovld __cnfn convert_char2_sat_rtn(short2);\n"
27558"char2 __ovld __cnfn convert_char2(short2);\n"
27559"char2 __ovld __cnfn convert_char2_sat(short2);\n"
27560"char2 __ovld __cnfn convert_char2_rte(ushort2);\n"
27561"char2 __ovld __cnfn convert_char2_sat_rte(ushort2);\n"
27562"char2 __ovld __cnfn convert_char2_rtz(ushort2);\n"
27563"char2 __ovld __cnfn convert_char2_sat_rtz(ushort2);\n"
27564"char2 __ovld __cnfn convert_char2_rtp(ushort2);\n"
27565"char2 __ovld __cnfn convert_char2_sat_rtp(ushort2);\n"
27566"char2 __ovld __cnfn convert_char2_rtn(ushort2);\n"
27567"char2 __ovld __cnfn convert_char2_sat_rtn(ushort2);\n"
27568"char2 __ovld __cnfn convert_char2(ushort2);\n"
27569"char2 __ovld __cnfn convert_char2_sat(ushort2);\n"
27570"char2 __ovld __cnfn convert_char2_rte(int2);\n"
27571"char2 __ovld __cnfn convert_char2_sat_rte(int2);\n"
27572"char2 __ovld __cnfn convert_char2_rtz(int2);\n"
27573"char2 __ovld __cnfn convert_char2_sat_rtz(int2);\n"
27574"char2 __ovld __cnfn convert_char2_rtp(int2);\n"
27575"char2 __ovld __cnfn convert_char2_sat_rtp(int2);\n"
27576"char2 __ovld __cnfn convert_char2_rtn(int2);\n"
27577"char2 __ovld __cnfn convert_char2_sat_rtn(int2);\n"
27578"char2 __ovld __cnfn convert_char2(int2);\n"
27579"char2 __ovld __cnfn convert_char2_sat(int2);\n"
27580"char2 __ovld __cnfn convert_char2_rte(uint2);\n"
27581"char2 __ovld __cnfn convert_char2_sat_rte(uint2);\n"
27582"char2 __ovld __cnfn convert_char2_rtz(uint2);\n"
27583"char2 __ovld __cnfn convert_char2_sat_rtz(uint2);\n"
27584"char2 __ovld __cnfn convert_char2_rtp(uint2);\n"
27585"char2 __ovld __cnfn convert_char2_sat_rtp(uint2);\n"
27586"char2 __ovld __cnfn convert_char2_rtn(uint2);\n"
27587"char2 __ovld __cnfn convert_char2_sat_rtn(uint2);\n"
27588"char2 __ovld __cnfn convert_char2(uint2);\n"
27589"char2 __ovld __cnfn convert_char2_sat(uint2);\n"
27590"char2 __ovld __cnfn convert_char2_rte(long2);\n"
27591"char2 __ovld __cnfn convert_char2_sat_rte(long2);\n"
27592"char2 __ovld __cnfn convert_char2_rtz(long2);\n"
27593"char2 __ovld __cnfn convert_char2_sat_rtz(long2);\n"
27594"char2 __ovld __cnfn convert_char2_rtp(long2);\n"
27595"char2 __ovld __cnfn convert_char2_sat_rtp(long2);\n"
27596"char2 __ovld __cnfn convert_char2_rtn(long2);\n"
27597"char2 __ovld __cnfn convert_char2_sat_rtn(long2);\n"
27598"char2 __ovld __cnfn convert_char2(long2);\n"
27599"char2 __ovld __cnfn convert_char2_sat(long2);\n"
27600"char2 __ovld __cnfn convert_char2_rte(ulong2);\n"
27601"char2 __ovld __cnfn convert_char2_sat_rte(ulong2);\n"
27602"char2 __ovld __cnfn convert_char2_rtz(ulong2);\n"
27603"char2 __ovld __cnfn convert_char2_sat_rtz(ulong2);\n"
27604"char2 __ovld __cnfn convert_char2_rtp(ulong2);\n"
27605"char2 __ovld __cnfn convert_char2_sat_rtp(ulong2);\n"
27606"char2 __ovld __cnfn convert_char2_rtn(ulong2);\n"
27607"char2 __ovld __cnfn convert_char2_sat_rtn(ulong2);\n"
27608"char2 __ovld __cnfn convert_char2(ulong2);\n"
27609"char2 __ovld __cnfn convert_char2_sat(ulong2);\n"
27610"char2 __ovld __cnfn convert_char2_rte(float2);\n"
27611"char2 __ovld __cnfn convert_char2_sat_rte(float2);\n"
27612"char2 __ovld __cnfn convert_char2_rtz(float2);\n"
27613"char2 __ovld __cnfn convert_char2_sat_rtz(float2);\n"
27614"char2 __ovld __cnfn convert_char2_rtp(float2);\n"
27615"char2 __ovld __cnfn convert_char2_sat_rtp(float2);\n"
27616"char2 __ovld __cnfn convert_char2_rtn(float2);\n"
27617"char2 __ovld __cnfn convert_char2_sat_rtn(float2);\n"
27618"char2 __ovld __cnfn convert_char2(float2);\n"
27619"char2 __ovld __cnfn convert_char2_sat(float2);\n"
27620"uchar2 __ovld __cnfn convert_uchar2_rte(char2);\n"
27621"uchar2 __ovld __cnfn convert_uchar2_sat_rte(char2);\n"
27622"uchar2 __ovld __cnfn convert_uchar2_rtz(char2);\n"
27623"uchar2 __ovld __cnfn convert_uchar2_sat_rtz(char2);\n"
27624"uchar2 __ovld __cnfn convert_uchar2_rtp(char2);\n"
27625"uchar2 __ovld __cnfn convert_uchar2_sat_rtp(char2);\n"
27626"uchar2 __ovld __cnfn convert_uchar2_rtn(char2);\n"
27627"uchar2 __ovld __cnfn convert_uchar2_sat_rtn(char2);\n"
27628"uchar2 __ovld __cnfn convert_uchar2(char2);\n"
27629"uchar2 __ovld __cnfn convert_uchar2_sat(char2);\n"
27630"uchar2 __ovld __cnfn convert_uchar2_rte(uchar2);\n"
27631"uchar2 __ovld __cnfn convert_uchar2_sat_rte(uchar2);\n"
27632"uchar2 __ovld __cnfn convert_uchar2_rtz(uchar2);\n"
27633"uchar2 __ovld __cnfn convert_uchar2_sat_rtz(uchar2);\n"
27634"uchar2 __ovld __cnfn convert_uchar2_rtp(uchar2);\n"
27635"uchar2 __ovld __cnfn convert_uchar2_sat_rtp(uchar2);\n"
27636"uchar2 __ovld __cnfn convert_uchar2_rtn(uchar2);\n"
27637"uchar2 __ovld __cnfn convert_uchar2_sat_rtn(uchar2);\n"
27638"uchar2 __ovld __cnfn convert_uchar2(uchar2);\n"
27639"uchar2 __ovld __cnfn convert_uchar2_sat(uchar2);\n"
27640"uchar2 __ovld __cnfn convert_uchar2_rte(short2);\n"
27641"uchar2 __ovld __cnfn convert_uchar2_sat_rte(short2);\n"
27642"uchar2 __ovld __cnfn convert_uchar2_rtz(short2);\n"
27643"uchar2 __ovld __cnfn convert_uchar2_sat_rtz(short2);\n"
27644"uchar2 __ovld __cnfn convert_uchar2_rtp(short2);\n"
27645"uchar2 __ovld __cnfn convert_uchar2_sat_rtp(short2);\n"
27646"uchar2 __ovld __cnfn convert_uchar2_rtn(short2);\n"
27647"uchar2 __ovld __cnfn convert_uchar2_sat_rtn(short2);\n"
27648"uchar2 __ovld __cnfn convert_uchar2(short2);\n"
27649"uchar2 __ovld __cnfn convert_uchar2_sat(short2);\n"
27650"uchar2 __ovld __cnfn convert_uchar2_rte(ushort2);\n"
27651"uchar2 __ovld __cnfn convert_uchar2_sat_rte(ushort2);\n"
27652"uchar2 __ovld __cnfn convert_uchar2_rtz(ushort2);\n"
27653"uchar2 __ovld __cnfn convert_uchar2_sat_rtz(ushort2);\n"
27654"uchar2 __ovld __cnfn convert_uchar2_rtp(ushort2);\n"
27655"uchar2 __ovld __cnfn convert_uchar2_sat_rtp(ushort2);\n"
27656"uchar2 __ovld __cnfn convert_uchar2_rtn(ushort2);\n"
27657"uchar2 __ovld __cnfn convert_uchar2_sat_rtn(ushort2);\n"
27658"uchar2 __ovld __cnfn convert_uchar2(ushort2);\n"
27659"uchar2 __ovld __cnfn convert_uchar2_sat(ushort2);\n"
27660"uchar2 __ovld __cnfn convert_uchar2_rte(int2);\n"
27661"uchar2 __ovld __cnfn convert_uchar2_sat_rte(int2);\n"
27662"uchar2 __ovld __cnfn convert_uchar2_rtz(int2);\n"
27663"uchar2 __ovld __cnfn convert_uchar2_sat_rtz(int2);\n"
27664"uchar2 __ovld __cnfn convert_uchar2_rtp(int2);\n"
27665"uchar2 __ovld __cnfn convert_uchar2_sat_rtp(int2);\n"
27666"uchar2 __ovld __cnfn convert_uchar2_rtn(int2);\n"
27667"uchar2 __ovld __cnfn convert_uchar2_sat_rtn(int2);\n"
27668"uchar2 __ovld __cnfn convert_uchar2(int2);\n"
27669"uchar2 __ovld __cnfn convert_uchar2_sat(int2);\n"
27670"uchar2 __ovld __cnfn convert_uchar2_rte(uint2);\n"
27671"uchar2 __ovld __cnfn convert_uchar2_sat_rte(uint2);\n"
27672"uchar2 __ovld __cnfn convert_uchar2_rtz(uint2);\n"
27673"uchar2 __ovld __cnfn convert_uchar2_sat_rtz(uint2);\n"
27674"uchar2 __ovld __cnfn convert_uchar2_rtp(uint2);\n"
27675"uchar2 __ovld __cnfn convert_uchar2_sat_rtp(uint2);\n"
27676"uchar2 __ovld __cnfn convert_uchar2_rtn(uint2);\n"
27677"uchar2 __ovld __cnfn convert_uchar2_sat_rtn(uint2);\n"
27678"uchar2 __ovld __cnfn convert_uchar2(uint2);\n"
27679"uchar2 __ovld __cnfn convert_uchar2_sat(uint2);\n"
27680"uchar2 __ovld __cnfn convert_uchar2_rte(long2);\n"
27681"uchar2 __ovld __cnfn convert_uchar2_sat_rte(long2);\n"
27682"uchar2 __ovld __cnfn convert_uchar2_rtz(long2);\n"
27683"uchar2 __ovld __cnfn convert_uchar2_sat_rtz(long2);\n"
27684"uchar2 __ovld __cnfn convert_uchar2_rtp(long2);\n"
27685"uchar2 __ovld __cnfn convert_uchar2_sat_rtp(long2);\n"
27686"uchar2 __ovld __cnfn convert_uchar2_rtn(long2);\n"
27687"uchar2 __ovld __cnfn convert_uchar2_sat_rtn(long2);\n"
27688"uchar2 __ovld __cnfn convert_uchar2(long2);\n"
27689"uchar2 __ovld __cnfn convert_uchar2_sat(long2);\n"
27690"uchar2 __ovld __cnfn convert_uchar2_rte(ulong2);\n"
27691"uchar2 __ovld __cnfn convert_uchar2_sat_rte(ulong2);\n"
27692"uchar2 __ovld __cnfn convert_uchar2_rtz(ulong2);\n"
27693"uchar2 __ovld __cnfn convert_uchar2_sat_rtz(ulong2);\n"
27694"uchar2 __ovld __cnfn convert_uchar2_rtp(ulong2);\n"
27695"uchar2 __ovld __cnfn convert_uchar2_sat_rtp(ulong2);\n"
27696"uchar2 __ovld __cnfn convert_uchar2_rtn(ulong2);\n"
27697"uchar2 __ovld __cnfn convert_uchar2_sat_rtn(ulong2);\n"
27698"uchar2 __ovld __cnfn convert_uchar2(ulong2);\n"
27699"uchar2 __ovld __cnfn convert_uchar2_sat(ulong2);\n"
27700"uchar2 __ovld __cnfn convert_uchar2_rte(float2);\n"
27701"uchar2 __ovld __cnfn convert_uchar2_sat_rte(float2);\n"
27702"uchar2 __ovld __cnfn convert_uchar2_rtz(float2);\n"
27703"uchar2 __ovld __cnfn convert_uchar2_sat_rtz(float2);\n"
27704"uchar2 __ovld __cnfn convert_uchar2_rtp(float2);\n"
27705"uchar2 __ovld __cnfn convert_uchar2_sat_rtp(float2);\n"
27706"uchar2 __ovld __cnfn convert_uchar2_rtn(float2);\n"
27707"uchar2 __ovld __cnfn convert_uchar2_sat_rtn(float2);\n"
27708"uchar2 __ovld __cnfn convert_uchar2(float2);\n"
27709"uchar2 __ovld __cnfn convert_uchar2_sat(float2);\n"
27710"short2 __ovld __cnfn convert_short2_rte(char2);\n"
27711"short2 __ovld __cnfn convert_short2_sat_rte(char2);\n"
27712"short2 __ovld __cnfn convert_short2_rtz(char2);\n"
27713"short2 __ovld __cnfn convert_short2_sat_rtz(char2);\n"
27714"short2 __ovld __cnfn convert_short2_rtp(char2);\n"
27715"short2 __ovld __cnfn convert_short2_sat_rtp(char2);\n"
27716"short2 __ovld __cnfn convert_short2_rtn(char2);\n"
27717"short2 __ovld __cnfn convert_short2_sat_rtn(char2);\n"
27718"short2 __ovld __cnfn convert_short2(char2);\n"
27719"short2 __ovld __cnfn convert_short2_sat(char2);\n"
27720"short2 __ovld __cnfn convert_short2_rte(uchar2);\n"
27721"short2 __ovld __cnfn convert_short2_sat_rte(uchar2);\n"
27722"short2 __ovld __cnfn convert_short2_rtz(uchar2);\n"
27723"short2 __ovld __cnfn convert_short2_sat_rtz(uchar2);\n"
27724"short2 __ovld __cnfn convert_short2_rtp(uchar2);\n"
27725"short2 __ovld __cnfn convert_short2_sat_rtp(uchar2);\n"
27726"short2 __ovld __cnfn convert_short2_rtn(uchar2);\n"
27727"short2 __ovld __cnfn convert_short2_sat_rtn(uchar2);\n"
27728"short2 __ovld __cnfn convert_short2(uchar2);\n"
27729"short2 __ovld __cnfn convert_short2_sat(uchar2);\n"
27730"short2 __ovld __cnfn convert_short2_rte(short2);\n"
27731"short2 __ovld __cnfn convert_short2_sat_rte(short2);\n"
27732"short2 __ovld __cnfn convert_short2_rtz(short2);\n"
27733"short2 __ovld __cnfn convert_short2_sat_rtz(short2);\n"
27734"short2 __ovld __cnfn convert_short2_rtp(short2);\n"
27735"short2 __ovld __cnfn convert_short2_sat_rtp(short2);\n"
27736"short2 __ovld __cnfn convert_short2_rtn(short2);\n"
27737"short2 __ovld __cnfn convert_short2_sat_rtn(short2);\n"
27738"short2 __ovld __cnfn convert_short2(short2);\n"
27739"short2 __ovld __cnfn convert_short2_sat(short2);\n"
27740"short2 __ovld __cnfn convert_short2_rte(ushort2);\n"
27741"short2 __ovld __cnfn convert_short2_sat_rte(ushort2);\n"
27742"short2 __ovld __cnfn convert_short2_rtz(ushort2);\n"
27743"short2 __ovld __cnfn convert_short2_sat_rtz(ushort2);\n"
27744"short2 __ovld __cnfn convert_short2_rtp(ushort2);\n"
27745"short2 __ovld __cnfn convert_short2_sat_rtp(ushort2);\n"
27746"short2 __ovld __cnfn convert_short2_rtn(ushort2);\n"
27747"short2 __ovld __cnfn convert_short2_sat_rtn(ushort2);\n"
27748"short2 __ovld __cnfn convert_short2(ushort2);\n"
27749"short2 __ovld __cnfn convert_short2_sat(ushort2);\n"
27750"short2 __ovld __cnfn convert_short2_rte(int2);\n"
27751"short2 __ovld __cnfn convert_short2_sat_rte(int2);\n"
27752"short2 __ovld __cnfn convert_short2_rtz(int2);\n"
27753"short2 __ovld __cnfn convert_short2_sat_rtz(int2);\n"
27754"short2 __ovld __cnfn convert_short2_rtp(int2);\n"
27755"short2 __ovld __cnfn convert_short2_sat_rtp(int2);\n"
27756"short2 __ovld __cnfn convert_short2_rtn(int2);\n"
27757"short2 __ovld __cnfn convert_short2_sat_rtn(int2);\n"
27758"short2 __ovld __cnfn convert_short2(int2);\n"
27759"short2 __ovld __cnfn convert_short2_sat(int2);\n"
27760"short2 __ovld __cnfn convert_short2_rte(uint2);\n"
27761"short2 __ovld __cnfn convert_short2_sat_rte(uint2);\n"
27762"short2 __ovld __cnfn convert_short2_rtz(uint2);\n"
27763"short2 __ovld __cnfn convert_short2_sat_rtz(uint2);\n"
27764"short2 __ovld __cnfn convert_short2_rtp(uint2);\n"
27765"short2 __ovld __cnfn convert_short2_sat_rtp(uint2);\n"
27766"short2 __ovld __cnfn convert_short2_rtn(uint2);\n"
27767"short2 __ovld __cnfn convert_short2_sat_rtn(uint2);\n"
27768"short2 __ovld __cnfn convert_short2(uint2);\n"
27769"short2 __ovld __cnfn convert_short2_sat(uint2);\n"
27770"short2 __ovld __cnfn convert_short2_rte(long2);\n"
27771"short2 __ovld __cnfn convert_short2_sat_rte(long2);\n"
27772"short2 __ovld __cnfn convert_short2_rtz(long2);\n"
27773"short2 __ovld __cnfn convert_short2_sat_rtz(long2);\n"
27774"short2 __ovld __cnfn convert_short2_rtp(long2);\n"
27775"short2 __ovld __cnfn convert_short2_sat_rtp(long2);\n"
27776"short2 __ovld __cnfn convert_short2_rtn(long2);\n"
27777"short2 __ovld __cnfn convert_short2_sat_rtn(long2);\n"
27778"short2 __ovld __cnfn convert_short2(long2);\n"
27779"short2 __ovld __cnfn convert_short2_sat(long2);\n"
27780"short2 __ovld __cnfn convert_short2_rte(ulong2);\n"
27781"short2 __ovld __cnfn convert_short2_sat_rte(ulong2);\n"
27782"short2 __ovld __cnfn convert_short2_rtz(ulong2);\n"
27783"short2 __ovld __cnfn convert_short2_sat_rtz(ulong2);\n"
27784"short2 __ovld __cnfn convert_short2_rtp(ulong2);\n"
27785"short2 __ovld __cnfn convert_short2_sat_rtp(ulong2);\n"
27786"short2 __ovld __cnfn convert_short2_rtn(ulong2);\n"
27787"short2 __ovld __cnfn convert_short2_sat_rtn(ulong2);\n"
27788"short2 __ovld __cnfn convert_short2(ulong2);\n"
27789"short2 __ovld __cnfn convert_short2_sat(ulong2);\n"
27790"short2 __ovld __cnfn convert_short2_rte(float2);\n"
27791"short2 __ovld __cnfn convert_short2_sat_rte(float2);\n"
27792"short2 __ovld __cnfn convert_short2_rtz(float2);\n"
27793"short2 __ovld __cnfn convert_short2_sat_rtz(float2);\n"
27794"short2 __ovld __cnfn convert_short2_rtp(float2);\n"
27795"short2 __ovld __cnfn convert_short2_sat_rtp(float2);\n"
27796"short2 __ovld __cnfn convert_short2_rtn(float2);\n"
27797"short2 __ovld __cnfn convert_short2_sat_rtn(float2);\n"
27798"short2 __ovld __cnfn convert_short2(float2);\n"
27799"short2 __ovld __cnfn convert_short2_sat(float2);\n"
27800"ushort2 __ovld __cnfn convert_ushort2_rte(char2);\n"
27801"ushort2 __ovld __cnfn convert_ushort2_sat_rte(char2);\n"
27802"ushort2 __ovld __cnfn convert_ushort2_rtz(char2);\n"
27803"ushort2 __ovld __cnfn convert_ushort2_sat_rtz(char2);\n"
27804"ushort2 __ovld __cnfn convert_ushort2_rtp(char2);\n"
27805"ushort2 __ovld __cnfn convert_ushort2_sat_rtp(char2);\n"
27806"ushort2 __ovld __cnfn convert_ushort2_rtn(char2);\n"
27807"ushort2 __ovld __cnfn convert_ushort2_sat_rtn(char2);\n"
27808"ushort2 __ovld __cnfn convert_ushort2(char2);\n"
27809"ushort2 __ovld __cnfn convert_ushort2_sat(char2);\n"
27810"ushort2 __ovld __cnfn convert_ushort2_rte(uchar2);\n"
27811"ushort2 __ovld __cnfn convert_ushort2_sat_rte(uchar2);\n"
27812"ushort2 __ovld __cnfn convert_ushort2_rtz(uchar2);\n"
27813"ushort2 __ovld __cnfn convert_ushort2_sat_rtz(uchar2);\n"
27814"ushort2 __ovld __cnfn convert_ushort2_rtp(uchar2);\n"
27815"ushort2 __ovld __cnfn convert_ushort2_sat_rtp(uchar2);\n"
27816"ushort2 __ovld __cnfn convert_ushort2_rtn(uchar2);\n"
27817"ushort2 __ovld __cnfn convert_ushort2_sat_rtn(uchar2);\n"
27818"ushort2 __ovld __cnfn convert_ushort2(uchar2);\n"
27819"ushort2 __ovld __cnfn convert_ushort2_sat(uchar2);\n"
27820"ushort2 __ovld __cnfn convert_ushort2_rte(short2);\n"
27821"ushort2 __ovld __cnfn convert_ushort2_sat_rte(short2);\n"
27822"ushort2 __ovld __cnfn convert_ushort2_rtz(short2);\n"
27823"ushort2 __ovld __cnfn convert_ushort2_sat_rtz(short2);\n"
27824"ushort2 __ovld __cnfn convert_ushort2_rtp(short2);\n"
27825"ushort2 __ovld __cnfn convert_ushort2_sat_rtp(short2);\n"
27826"ushort2 __ovld __cnfn convert_ushort2_rtn(short2);\n"
27827"ushort2 __ovld __cnfn convert_ushort2_sat_rtn(short2);\n"
27828"ushort2 __ovld __cnfn convert_ushort2(short2);\n"
27829"ushort2 __ovld __cnfn convert_ushort2_sat(short2);\n"
27830"ushort2 __ovld __cnfn convert_ushort2_rte(ushort2);\n"
27831"ushort2 __ovld __cnfn convert_ushort2_sat_rte(ushort2);\n"
27832"ushort2 __ovld __cnfn convert_ushort2_rtz(ushort2);\n"
27833"ushort2 __ovld __cnfn convert_ushort2_sat_rtz(ushort2);\n"
27834"ushort2 __ovld __cnfn convert_ushort2_rtp(ushort2);\n"
27835"ushort2 __ovld __cnfn convert_ushort2_sat_rtp(ushort2);\n"
27836"ushort2 __ovld __cnfn convert_ushort2_rtn(ushort2);\n"
27837"ushort2 __ovld __cnfn convert_ushort2_sat_rtn(ushort2);\n"
27838"ushort2 __ovld __cnfn convert_ushort2(ushort2);\n"
27839"ushort2 __ovld __cnfn convert_ushort2_sat(ushort2);\n"
27840"ushort2 __ovld __cnfn convert_ushort2_rte(int2);\n"
27841"ushort2 __ovld __cnfn convert_ushort2_sat_rte(int2);\n"
27842"ushort2 __ovld __cnfn convert_ushort2_rtz(int2);\n"
27843"ushort2 __ovld __cnfn convert_ushort2_sat_rtz(int2);\n"
27844"ushort2 __ovld __cnfn convert_ushort2_rtp(int2);\n"
27845"ushort2 __ovld __cnfn convert_ushort2_sat_rtp(int2);\n"
27846"ushort2 __ovld __cnfn convert_ushort2_rtn(int2);\n"
27847"ushort2 __ovld __cnfn convert_ushort2_sat_rtn(int2);\n"
27848"ushort2 __ovld __cnfn convert_ushort2(int2);\n"
27849"ushort2 __ovld __cnfn convert_ushort2_sat(int2);\n"
27850"ushort2 __ovld __cnfn convert_ushort2_rte(uint2);\n"
27851"ushort2 __ovld __cnfn convert_ushort2_sat_rte(uint2);\n"
27852"ushort2 __ovld __cnfn convert_ushort2_rtz(uint2);\n"
27853"ushort2 __ovld __cnfn convert_ushort2_sat_rtz(uint2);\n"
27854"ushort2 __ovld __cnfn convert_ushort2_rtp(uint2);\n"
27855"ushort2 __ovld __cnfn convert_ushort2_sat_rtp(uint2);\n"
27856"ushort2 __ovld __cnfn convert_ushort2_rtn(uint2);\n"
27857"ushort2 __ovld __cnfn convert_ushort2_sat_rtn(uint2);\n"
27858"ushort2 __ovld __cnfn convert_ushort2(uint2);\n"
27859"ushort2 __ovld __cnfn convert_ushort2_sat(uint2);\n"
27860"ushort2 __ovld __cnfn convert_ushort2_rte(long2);\n"
27861"ushort2 __ovld __cnfn convert_ushort2_sat_rte(long2);\n"
27862"ushort2 __ovld __cnfn convert_ushort2_rtz(long2);\n"
27863"ushort2 __ovld __cnfn convert_ushort2_sat_rtz(long2);\n"
27864"ushort2 __ovld __cnfn convert_ushort2_rtp(long2);\n"
27865"ushort2 __ovld __cnfn convert_ushort2_sat_rtp(long2);\n"
27866"ushort2 __ovld __cnfn convert_ushort2_rtn(long2);\n"
27867"ushort2 __ovld __cnfn convert_ushort2_sat_rtn(long2);\n"
27868"ushort2 __ovld __cnfn convert_ushort2(long2);\n"
27869"ushort2 __ovld __cnfn convert_ushort2_sat(long2);\n"
27870"ushort2 __ovld __cnfn convert_ushort2_rte(ulong2);\n"
27871"ushort2 __ovld __cnfn convert_ushort2_sat_rte(ulong2);\n"
27872"ushort2 __ovld __cnfn convert_ushort2_rtz(ulong2);\n"
27873"ushort2 __ovld __cnfn convert_ushort2_sat_rtz(ulong2);\n"
27874"ushort2 __ovld __cnfn convert_ushort2_rtp(ulong2);\n"
27875"ushort2 __ovld __cnfn convert_ushort2_sat_rtp(ulong2);\n"
27876"ushort2 __ovld __cnfn convert_ushort2_rtn(ulong2);\n"
27877"ushort2 __ovld __cnfn convert_ushort2_sat_rtn(ulong2);\n"
27878"ushort2 __ovld __cnfn convert_ushort2(ulong2);\n"
27879"ushort2 __ovld __cnfn convert_ushort2_sat(ulong2);\n"
27880"ushort2 __ovld __cnfn convert_ushort2_rte(float2);\n"
27881"ushort2 __ovld __cnfn convert_ushort2_sat_rte(float2);\n"
27882"ushort2 __ovld __cnfn convert_ushort2_rtz(float2);\n"
27883"ushort2 __ovld __cnfn convert_ushort2_sat_rtz(float2);\n"
27884"ushort2 __ovld __cnfn convert_ushort2_rtp(float2);\n"
27885"ushort2 __ovld __cnfn convert_ushort2_sat_rtp(float2);\n"
27886"ushort2 __ovld __cnfn convert_ushort2_rtn(float2);\n"
27887"ushort2 __ovld __cnfn convert_ushort2_sat_rtn(float2);\n"
27888"ushort2 __ovld __cnfn convert_ushort2(float2);\n"
27889"ushort2 __ovld __cnfn convert_ushort2_sat(float2);\n"
27890"int2 __ovld __cnfn convert_int2_rte(char2);\n"
27891"int2 __ovld __cnfn convert_int2_sat_rte(char2);\n"
27892"int2 __ovld __cnfn convert_int2_rtz(char2);\n"
27893"int2 __ovld __cnfn convert_int2_sat_rtz(char2);\n"
27894"int2 __ovld __cnfn convert_int2_rtp(char2);\n"
27895"int2 __ovld __cnfn convert_int2_sat_rtp(char2);\n"
27896"int2 __ovld __cnfn convert_int2_rtn(char2);\n"
27897"int2 __ovld __cnfn convert_int2_sat_rtn(char2);\n"
27898"int2 __ovld __cnfn convert_int2(char2);\n"
27899"int2 __ovld __cnfn convert_int2_sat(char2);\n"
27900"int2 __ovld __cnfn convert_int2_rte(uchar2);\n"
27901"int2 __ovld __cnfn convert_int2_sat_rte(uchar2);\n"
27902"int2 __ovld __cnfn convert_int2_rtz(uchar2);\n"
27903"int2 __ovld __cnfn convert_int2_sat_rtz(uchar2);\n"
27904"int2 __ovld __cnfn convert_int2_rtp(uchar2);\n"
27905"int2 __ovld __cnfn convert_int2_sat_rtp(uchar2);\n"
27906"int2 __ovld __cnfn convert_int2_rtn(uchar2);\n"
27907"int2 __ovld __cnfn convert_int2_sat_rtn(uchar2);\n"
27908"int2 __ovld __cnfn convert_int2(uchar2);\n"
27909"int2 __ovld __cnfn convert_int2_sat(uchar2);\n"
27910"int2 __ovld __cnfn convert_int2_rte(short2);\n"
27911"int2 __ovld __cnfn convert_int2_sat_rte(short2);\n"
27912"int2 __ovld __cnfn convert_int2_rtz(short2);\n"
27913"int2 __ovld __cnfn convert_int2_sat_rtz(short2);\n"
27914"int2 __ovld __cnfn convert_int2_rtp(short2);\n"
27915"int2 __ovld __cnfn convert_int2_sat_rtp(short2);\n"
27916"int2 __ovld __cnfn convert_int2_rtn(short2);\n"
27917"int2 __ovld __cnfn convert_int2_sat_rtn(short2);\n"
27918"int2 __ovld __cnfn convert_int2(short2);\n"
27919"int2 __ovld __cnfn convert_int2_sat(short2);\n"
27920"int2 __ovld __cnfn convert_int2_rte(ushort2);\n"
27921"int2 __ovld __cnfn convert_int2_sat_rte(ushort2);\n"
27922"int2 __ovld __cnfn convert_int2_rtz(ushort2);\n"
27923"int2 __ovld __cnfn convert_int2_sat_rtz(ushort2);\n"
27924"int2 __ovld __cnfn convert_int2_rtp(ushort2);\n"
27925"int2 __ovld __cnfn convert_int2_sat_rtp(ushort2);\n"
27926"int2 __ovld __cnfn convert_int2_rtn(ushort2);\n"
27927"int2 __ovld __cnfn convert_int2_sat_rtn(ushort2);\n"
27928"int2 __ovld __cnfn convert_int2(ushort2);\n"
27929"int2 __ovld __cnfn convert_int2_sat(ushort2);\n"
27930"int2 __ovld __cnfn convert_int2_rte(int2);\n"
27931"int2 __ovld __cnfn convert_int2_sat_rte(int2);\n"
27932"int2 __ovld __cnfn convert_int2_rtz(int2);\n"
27933"int2 __ovld __cnfn convert_int2_sat_rtz(int2);\n"
27934"int2 __ovld __cnfn convert_int2_rtp(int2);\n"
27935"int2 __ovld __cnfn convert_int2_sat_rtp(int2);\n"
27936"int2 __ovld __cnfn convert_int2_rtn(int2);\n"
27937"int2 __ovld __cnfn convert_int2_sat_rtn(int2);\n"
27938"int2 __ovld __cnfn convert_int2(int2);\n"
27939"int2 __ovld __cnfn convert_int2_sat(int2);\n"
27940"int2 __ovld __cnfn convert_int2_rte(uint2);\n"
27941"int2 __ovld __cnfn convert_int2_sat_rte(uint2);\n"
27942"int2 __ovld __cnfn convert_int2_rtz(uint2);\n"
27943"int2 __ovld __cnfn convert_int2_sat_rtz(uint2);\n"
27944"int2 __ovld __cnfn convert_int2_rtp(uint2);\n"
27945"int2 __ovld __cnfn convert_int2_sat_rtp(uint2);\n"
27946"int2 __ovld __cnfn convert_int2_rtn(uint2);\n"
27947"int2 __ovld __cnfn convert_int2_sat_rtn(uint2);\n"
27948"int2 __ovld __cnfn convert_int2(uint2);\n"
27949"int2 __ovld __cnfn convert_int2_sat(uint2);\n"
27950"int2 __ovld __cnfn convert_int2_rte(long2);\n"
27951"int2 __ovld __cnfn convert_int2_sat_rte(long2);\n"
27952"int2 __ovld __cnfn convert_int2_rtz(long2);\n"
27953"int2 __ovld __cnfn convert_int2_sat_rtz(long2);\n"
27954"int2 __ovld __cnfn convert_int2_rtp(long2);\n"
27955"int2 __ovld __cnfn convert_int2_sat_rtp(long2);\n"
27956"int2 __ovld __cnfn convert_int2_rtn(long2);\n"
27957"int2 __ovld __cnfn convert_int2_sat_rtn(long2);\n"
27958"int2 __ovld __cnfn convert_int2(long2);\n"
27959"int2 __ovld __cnfn convert_int2_sat(long2);\n"
27960"int2 __ovld __cnfn convert_int2_rte(ulong2);\n"
27961"int2 __ovld __cnfn convert_int2_sat_rte(ulong2);\n"
27962"int2 __ovld __cnfn convert_int2_rtz(ulong2);\n"
27963"int2 __ovld __cnfn convert_int2_sat_rtz(ulong2);\n"
27964"int2 __ovld __cnfn convert_int2_rtp(ulong2);\n"
27965"int2 __ovld __cnfn convert_int2_sat_rtp(ulong2);\n"
27966"int2 __ovld __cnfn convert_int2_rtn(ulong2);\n"
27967"int2 __ovld __cnfn convert_int2_sat_rtn(ulong2);\n"
27968"int2 __ovld __cnfn convert_int2(ulong2);\n"
27969"int2 __ovld __cnfn convert_int2_sat(ulong2);\n"
27970"int2 __ovld __cnfn convert_int2_rte(float2);\n"
27971"int2 __ovld __cnfn convert_int2_sat_rte(float2);\n"
27972"int2 __ovld __cnfn convert_int2_rtz(float2);\n"
27973"int2 __ovld __cnfn convert_int2_sat_rtz(float2);\n"
27974"int2 __ovld __cnfn convert_int2_rtp(float2);\n"
27975"int2 __ovld __cnfn convert_int2_sat_rtp(float2);\n"
27976"int2 __ovld __cnfn convert_int2_rtn(float2);\n"
27977"int2 __ovld __cnfn convert_int2_sat_rtn(float2);\n"
27978"int2 __ovld __cnfn convert_int2(float2);\n"
27979"int2 __ovld __cnfn convert_int2_sat(float2);\n"
27980"uint2 __ovld __cnfn convert_uint2_rte(char2);\n"
27981"uint2 __ovld __cnfn convert_uint2_sat_rte(char2);\n"
27982"uint2 __ovld __cnfn convert_uint2_rtz(char2);\n"
27983"uint2 __ovld __cnfn convert_uint2_sat_rtz(char2);\n"
27984"uint2 __ovld __cnfn convert_uint2_rtp(char2);\n"
27985"uint2 __ovld __cnfn convert_uint2_sat_rtp(char2);\n"
27986"uint2 __ovld __cnfn convert_uint2_rtn(char2);\n"
27987"uint2 __ovld __cnfn convert_uint2_sat_rtn(char2);\n"
27988"uint2 __ovld __cnfn convert_uint2(char2);\n"
27989"uint2 __ovld __cnfn convert_uint2_sat(char2);\n"
27990"uint2 __ovld __cnfn convert_uint2_rte(uchar2);\n"
27991"uint2 __ovld __cnfn convert_uint2_sat_rte(uchar2);\n"
27992"uint2 __ovld __cnfn convert_uint2_rtz(uchar2);\n"
27993"uint2 __ovld __cnfn convert_uint2_sat_rtz(uchar2);\n"
27994"uint2 __ovld __cnfn convert_uint2_rtp(uchar2);\n"
27995"uint2 __ovld __cnfn convert_uint2_sat_rtp(uchar2);\n"
27996"uint2 __ovld __cnfn convert_uint2_rtn(uchar2);\n"
27997"uint2 __ovld __cnfn convert_uint2_sat_rtn(uchar2);\n"
27998"uint2 __ovld __cnfn convert_uint2(uchar2);\n"
27999"uint2 __ovld __cnfn convert_uint2_sat(uchar2);\n"
28000"uint2 __ovld __cnfn convert_uint2_rte(short2);\n"
28001"uint2 __ovld __cnfn convert_uint2_sat_rte(short2);\n"
28002"uint2 __ovld __cnfn convert_uint2_rtz(short2);\n"
28003"uint2 __ovld __cnfn convert_uint2_sat_rtz(short2);\n"
28004"uint2 __ovld __cnfn convert_uint2_rtp(short2);\n"
28005"uint2 __ovld __cnfn convert_uint2_sat_rtp(short2);\n"
28006"uint2 __ovld __cnfn convert_uint2_rtn(short2);\n"
28007"uint2 __ovld __cnfn convert_uint2_sat_rtn(short2);\n"
28008"uint2 __ovld __cnfn convert_uint2(short2);\n"
28009"uint2 __ovld __cnfn convert_uint2_sat(short2);\n"
28010"uint2 __ovld __cnfn convert_uint2_rte(ushort2);\n"
28011"uint2 __ovld __cnfn convert_uint2_sat_rte(ushort2);\n"
28012"uint2 __ovld __cnfn convert_uint2_rtz(ushort2);\n"
28013"uint2 __ovld __cnfn convert_uint2_sat_rtz(ushort2);\n"
28014"uint2 __ovld __cnfn convert_uint2_rtp(ushort2);\n"
28015"uint2 __ovld __cnfn convert_uint2_sat_rtp(ushort2);\n"
28016"uint2 __ovld __cnfn convert_uint2_rtn(ushort2);\n"
28017"uint2 __ovld __cnfn convert_uint2_sat_rtn(ushort2);\n"
28018"uint2 __ovld __cnfn convert_uint2(ushort2);\n"
28019"uint2 __ovld __cnfn convert_uint2_sat(ushort2);\n"
28020"uint2 __ovld __cnfn convert_uint2_rte(int2);\n"
28021"uint2 __ovld __cnfn convert_uint2_sat_rte(int2);\n"
28022"uint2 __ovld __cnfn convert_uint2_rtz(int2);\n"
28023"uint2 __ovld __cnfn convert_uint2_sat_rtz(int2);\n"
28024"uint2 __ovld __cnfn convert_uint2_rtp(int2);\n"
28025"uint2 __ovld __cnfn convert_uint2_sat_rtp(int2);\n"
28026"uint2 __ovld __cnfn convert_uint2_rtn(int2);\n"
28027"uint2 __ovld __cnfn convert_uint2_sat_rtn(int2);\n"
28028"uint2 __ovld __cnfn convert_uint2(int2);\n"
28029"uint2 __ovld __cnfn convert_uint2_sat(int2);\n"
28030"uint2 __ovld __cnfn convert_uint2_rte(uint2);\n"
28031"uint2 __ovld __cnfn convert_uint2_sat_rte(uint2);\n"
28032"uint2 __ovld __cnfn convert_uint2_rtz(uint2);\n"
28033"uint2 __ovld __cnfn convert_uint2_sat_rtz(uint2);\n"
28034"uint2 __ovld __cnfn convert_uint2_rtp(uint2);\n"
28035"uint2 __ovld __cnfn convert_uint2_sat_rtp(uint2);\n"
28036"uint2 __ovld __cnfn convert_uint2_rtn(uint2);\n"
28037"uint2 __ovld __cnfn convert_uint2_sat_rtn(uint2);\n"
28038"uint2 __ovld __cnfn convert_uint2(uint2);\n"
28039"uint2 __ovld __cnfn convert_uint2_sat(uint2);\n"
28040"uint2 __ovld __cnfn convert_uint2_rte(long2);\n"
28041"uint2 __ovld __cnfn convert_uint2_sat_rte(long2);\n"
28042"uint2 __ovld __cnfn convert_uint2_rtz(long2);\n"
28043"uint2 __ovld __cnfn convert_uint2_sat_rtz(long2);\n"
28044"uint2 __ovld __cnfn convert_uint2_rtp(long2);\n"
28045"uint2 __ovld __cnfn convert_uint2_sat_rtp(long2);\n"
28046"uint2 __ovld __cnfn convert_uint2_rtn(long2);\n"
28047"uint2 __ovld __cnfn convert_uint2_sat_rtn(long2);\n"
28048"uint2 __ovld __cnfn convert_uint2(long2);\n"
28049"uint2 __ovld __cnfn convert_uint2_sat(long2);\n"
28050"uint2 __ovld __cnfn convert_uint2_rte(ulong2);\n"
28051"uint2 __ovld __cnfn convert_uint2_sat_rte(ulong2);\n"
28052"uint2 __ovld __cnfn convert_uint2_rtz(ulong2);\n"
28053"uint2 __ovld __cnfn convert_uint2_sat_rtz(ulong2);\n"
28054"uint2 __ovld __cnfn convert_uint2_rtp(ulong2);\n"
28055"uint2 __ovld __cnfn convert_uint2_sat_rtp(ulong2);\n"
28056"uint2 __ovld __cnfn convert_uint2_rtn(ulong2);\n"
28057"uint2 __ovld __cnfn convert_uint2_sat_rtn(ulong2);\n"
28058"uint2 __ovld __cnfn convert_uint2(ulong2);\n"
28059"uint2 __ovld __cnfn convert_uint2_sat(ulong2);\n"
28060"uint2 __ovld __cnfn convert_uint2_rte(float2);\n"
28061"uint2 __ovld __cnfn convert_uint2_sat_rte(float2);\n"
28062"uint2 __ovld __cnfn convert_uint2_rtz(float2);\n"
28063"uint2 __ovld __cnfn convert_uint2_sat_rtz(float2);\n"
28064"uint2 __ovld __cnfn convert_uint2_rtp(float2);\n"
28065"uint2 __ovld __cnfn convert_uint2_sat_rtp(float2);\n"
28066"uint2 __ovld __cnfn convert_uint2_rtn(float2);\n"
28067"uint2 __ovld __cnfn convert_uint2_sat_rtn(float2);\n"
28068"uint2 __ovld __cnfn convert_uint2(float2);\n"
28069"uint2 __ovld __cnfn convert_uint2_sat(float2);\n"
28070"long2 __ovld __cnfn convert_long2_rte(char2);\n"
28071"long2 __ovld __cnfn convert_long2_sat_rte(char2);\n"
28072"long2 __ovld __cnfn convert_long2_rtz(char2);\n"
28073"long2 __ovld __cnfn convert_long2_sat_rtz(char2);\n"
28074"long2 __ovld __cnfn convert_long2_rtp(char2);\n"
28075"long2 __ovld __cnfn convert_long2_sat_rtp(char2);\n"
28076"long2 __ovld __cnfn convert_long2_rtn(char2);\n"
28077"long2 __ovld __cnfn convert_long2_sat_rtn(char2);\n"
28078"long2 __ovld __cnfn convert_long2(char2);\n"
28079"long2 __ovld __cnfn convert_long2_sat(char2);\n"
28080"long2 __ovld __cnfn convert_long2_rte(uchar2);\n"
28081"long2 __ovld __cnfn convert_long2_sat_rte(uchar2);\n"
28082"long2 __ovld __cnfn convert_long2_rtz(uchar2);\n"
28083"long2 __ovld __cnfn convert_long2_sat_rtz(uchar2);\n"
28084"long2 __ovld __cnfn convert_long2_rtp(uchar2);\n"
28085"long2 __ovld __cnfn convert_long2_sat_rtp(uchar2);\n"
28086"long2 __ovld __cnfn convert_long2_rtn(uchar2);\n"
28087"long2 __ovld __cnfn convert_long2_sat_rtn(uchar2);\n"
28088"long2 __ovld __cnfn convert_long2(uchar2);\n"
28089"long2 __ovld __cnfn convert_long2_sat(uchar2);\n"
28090"long2 __ovld __cnfn convert_long2_rte(short2);\n"
28091"long2 __ovld __cnfn convert_long2_sat_rte(short2);\n"
28092"long2 __ovld __cnfn convert_long2_rtz(short2);\n"
28093"long2 __ovld __cnfn convert_long2_sat_rtz(short2);\n"
28094"long2 __ovld __cnfn convert_long2_rtp(short2);\n"
28095"long2 __ovld __cnfn convert_long2_sat_rtp(short2);\n"
28096"long2 __ovld __cnfn convert_long2_rtn(short2);\n"
28097"long2 __ovld __cnfn convert_long2_sat_rtn(short2);\n"
28098"long2 __ovld __cnfn convert_long2(short2);\n"
28099"long2 __ovld __cnfn convert_long2_sat(short2);\n"
28100"long2 __ovld __cnfn convert_long2_rte(ushort2);\n"
28101"long2 __ovld __cnfn convert_long2_sat_rte(ushort2);\n"
28102"long2 __ovld __cnfn convert_long2_rtz(ushort2);\n"
28103"long2 __ovld __cnfn convert_long2_sat_rtz(ushort2);\n"
28104"long2 __ovld __cnfn convert_long2_rtp(ushort2);\n"
28105"long2 __ovld __cnfn convert_long2_sat_rtp(ushort2);\n"
28106"long2 __ovld __cnfn convert_long2_rtn(ushort2);\n"
28107"long2 __ovld __cnfn convert_long2_sat_rtn(ushort2);\n"
28108"long2 __ovld __cnfn convert_long2(ushort2);\n"
28109"long2 __ovld __cnfn convert_long2_sat(ushort2);\n"
28110"long2 __ovld __cnfn convert_long2_rte(int2);\n"
28111"long2 __ovld __cnfn convert_long2_sat_rte(int2);\n"
28112"long2 __ovld __cnfn convert_long2_rtz(int2);\n"
28113"long2 __ovld __cnfn convert_long2_sat_rtz(int2);\n"
28114"long2 __ovld __cnfn convert_long2_rtp(int2);\n"
28115"long2 __ovld __cnfn convert_long2_sat_rtp(int2);\n"
28116"long2 __ovld __cnfn convert_long2_rtn(int2);\n"
28117"long2 __ovld __cnfn convert_long2_sat_rtn(int2);\n"
28118"long2 __ovld __cnfn convert_long2(int2);\n"
28119"long2 __ovld __cnfn convert_long2_sat(int2);\n"
28120"long2 __ovld __cnfn convert_long2_rte(uint2);\n"
28121"long2 __ovld __cnfn convert_long2_sat_rte(uint2);\n"
28122"long2 __ovld __cnfn convert_long2_rtz(uint2);\n"
28123"long2 __ovld __cnfn convert_long2_sat_rtz(uint2);\n"
28124"long2 __ovld __cnfn convert_long2_rtp(uint2);\n"
28125"long2 __ovld __cnfn convert_long2_sat_rtp(uint2);\n"
28126"long2 __ovld __cnfn convert_long2_rtn(uint2);\n"
28127"long2 __ovld __cnfn convert_long2_sat_rtn(uint2);\n"
28128"long2 __ovld __cnfn convert_long2(uint2);\n"
28129"long2 __ovld __cnfn convert_long2_sat(uint2);\n"
28130"long2 __ovld __cnfn convert_long2_rte(long2);\n"
28131"long2 __ovld __cnfn convert_long2_sat_rte(long2);\n"
28132"long2 __ovld __cnfn convert_long2_rtz(long2);\n"
28133"long2 __ovld __cnfn convert_long2_sat_rtz(long2);\n"
28134"long2 __ovld __cnfn convert_long2_rtp(long2);\n"
28135"long2 __ovld __cnfn convert_long2_sat_rtp(long2);\n"
28136"long2 __ovld __cnfn convert_long2_rtn(long2);\n"
28137"long2 __ovld __cnfn convert_long2_sat_rtn(long2);\n"
28138"long2 __ovld __cnfn convert_long2(long2);\n"
28139"long2 __ovld __cnfn convert_long2_sat(long2);\n"
28140"long2 __ovld __cnfn convert_long2_rte(ulong2);\n"
28141"long2 __ovld __cnfn convert_long2_sat_rte(ulong2);\n"
28142"long2 __ovld __cnfn convert_long2_rtz(ulong2);\n"
28143"long2 __ovld __cnfn convert_long2_sat_rtz(ulong2);\n"
28144"long2 __ovld __cnfn convert_long2_rtp(ulong2);\n"
28145"long2 __ovld __cnfn convert_long2_sat_rtp(ulong2);\n"
28146"long2 __ovld __cnfn convert_long2_rtn(ulong2);\n"
28147"long2 __ovld __cnfn convert_long2_sat_rtn(ulong2);\n"
28148"long2 __ovld __cnfn convert_long2(ulong2);\n"
28149"long2 __ovld __cnfn convert_long2_sat(ulong2);\n"
28150"long2 __ovld __cnfn convert_long2_rte(float2);\n"
28151"long2 __ovld __cnfn convert_long2_sat_rte(float2);\n"
28152"long2 __ovld __cnfn convert_long2_rtz(float2);\n"
28153"long2 __ovld __cnfn convert_long2_sat_rtz(float2);\n"
28154"long2 __ovld __cnfn convert_long2_rtp(float2);\n"
28155"long2 __ovld __cnfn convert_long2_sat_rtp(float2);\n"
28156"long2 __ovld __cnfn convert_long2_rtn(float2);\n"
28157"long2 __ovld __cnfn convert_long2_sat_rtn(float2);\n"
28158"long2 __ovld __cnfn convert_long2(float2);\n"
28159"long2 __ovld __cnfn convert_long2_sat(float2);\n"
28160"ulong2 __ovld __cnfn convert_ulong2_rte(char2);\n"
28161"ulong2 __ovld __cnfn convert_ulong2_sat_rte(char2);\n"
28162"ulong2 __ovld __cnfn convert_ulong2_rtz(char2);\n"
28163"ulong2 __ovld __cnfn convert_ulong2_sat_rtz(char2);\n"
28164"ulong2 __ovld __cnfn convert_ulong2_rtp(char2);\n"
28165"ulong2 __ovld __cnfn convert_ulong2_sat_rtp(char2);\n"
28166"ulong2 __ovld __cnfn convert_ulong2_rtn(char2);\n"
28167"ulong2 __ovld __cnfn convert_ulong2_sat_rtn(char2);\n"
28168"ulong2 __ovld __cnfn convert_ulong2(char2);\n"
28169"ulong2 __ovld __cnfn convert_ulong2_sat(char2);\n"
28170"ulong2 __ovld __cnfn convert_ulong2_rte(uchar2);\n"
28171"ulong2 __ovld __cnfn convert_ulong2_sat_rte(uchar2);\n"
28172"ulong2 __ovld __cnfn convert_ulong2_rtz(uchar2);\n"
28173"ulong2 __ovld __cnfn convert_ulong2_sat_rtz(uchar2);\n"
28174"ulong2 __ovld __cnfn convert_ulong2_rtp(uchar2);\n"
28175"ulong2 __ovld __cnfn convert_ulong2_sat_rtp(uchar2);\n"
28176"ulong2 __ovld __cnfn convert_ulong2_rtn(uchar2);\n"
28177"ulong2 __ovld __cnfn convert_ulong2_sat_rtn(uchar2);\n"
28178"ulong2 __ovld __cnfn convert_ulong2(uchar2);\n"
28179"ulong2 __ovld __cnfn convert_ulong2_sat(uchar2);\n"
28180"ulong2 __ovld __cnfn convert_ulong2_rte(short2);\n"
28181"ulong2 __ovld __cnfn convert_ulong2_sat_rte(short2);\n"
28182"ulong2 __ovld __cnfn convert_ulong2_rtz(short2);\n"
28183"ulong2 __ovld __cnfn convert_ulong2_sat_rtz(short2);\n"
28184"ulong2 __ovld __cnfn convert_ulong2_rtp(short2);\n"
28185"ulong2 __ovld __cnfn convert_ulong2_sat_rtp(short2);\n"
28186"ulong2 __ovld __cnfn convert_ulong2_rtn(short2);\n"
28187"ulong2 __ovld __cnfn convert_ulong2_sat_rtn(short2);\n"
28188"ulong2 __ovld __cnfn convert_ulong2(short2);\n"
28189"ulong2 __ovld __cnfn convert_ulong2_sat(short2);\n"
28190"ulong2 __ovld __cnfn convert_ulong2_rte(ushort2);\n"
28191"ulong2 __ovld __cnfn convert_ulong2_sat_rte(ushort2);\n"
28192"ulong2 __ovld __cnfn convert_ulong2_rtz(ushort2);\n"
28193"ulong2 __ovld __cnfn convert_ulong2_sat_rtz(ushort2);\n"
28194"ulong2 __ovld __cnfn convert_ulong2_rtp(ushort2);\n"
28195"ulong2 __ovld __cnfn convert_ulong2_sat_rtp(ushort2);\n"
28196"ulong2 __ovld __cnfn convert_ulong2_rtn(ushort2);\n"
28197"ulong2 __ovld __cnfn convert_ulong2_sat_rtn(ushort2);\n"
28198"ulong2 __ovld __cnfn convert_ulong2(ushort2);\n"
28199"ulong2 __ovld __cnfn convert_ulong2_sat(ushort2);\n"
28200"ulong2 __ovld __cnfn convert_ulong2_rte(int2);\n"
28201"ulong2 __ovld __cnfn convert_ulong2_sat_rte(int2);\n"
28202"ulong2 __ovld __cnfn convert_ulong2_rtz(int2);\n"
28203"ulong2 __ovld __cnfn convert_ulong2_sat_rtz(int2);\n"
28204"ulong2 __ovld __cnfn convert_ulong2_rtp(int2);\n"
28205"ulong2 __ovld __cnfn convert_ulong2_sat_rtp(int2);\n"
28206"ulong2 __ovld __cnfn convert_ulong2_rtn(int2);\n"
28207"ulong2 __ovld __cnfn convert_ulong2_sat_rtn(int2);\n"
28208"ulong2 __ovld __cnfn convert_ulong2(int2);\n"
28209"ulong2 __ovld __cnfn convert_ulong2_sat(int2);\n"
28210"ulong2 __ovld __cnfn convert_ulong2_rte(uint2);\n"
28211"ulong2 __ovld __cnfn convert_ulong2_sat_rte(uint2);\n"
28212"ulong2 __ovld __cnfn convert_ulong2_rtz(uint2);\n"
28213"ulong2 __ovld __cnfn convert_ulong2_sat_rtz(uint2);\n"
28214"ulong2 __ovld __cnfn convert_ulong2_rtp(uint2);\n"
28215"ulong2 __ovld __cnfn convert_ulong2_sat_rtp(uint2);\n"
28216"ulong2 __ovld __cnfn convert_ulong2_rtn(uint2);\n"
28217"ulong2 __ovld __cnfn convert_ulong2_sat_rtn(uint2);\n"
28218"ulong2 __ovld __cnfn convert_ulong2(uint2);\n"
28219"ulong2 __ovld __cnfn convert_ulong2_sat(uint2);\n"
28220"ulong2 __ovld __cnfn convert_ulong2_rte(long2);\n"
28221"ulong2 __ovld __cnfn convert_ulong2_sat_rte(long2);\n"
28222"ulong2 __ovld __cnfn convert_ulong2_rtz(long2);\n"
28223"ulong2 __ovld __cnfn convert_ulong2_sat_rtz(long2);\n"
28224"ulong2 __ovld __cnfn convert_ulong2_rtp(long2);\n"
28225"ulong2 __ovld __cnfn convert_ulong2_sat_rtp(long2);\n"
28226"ulong2 __ovld __cnfn convert_ulong2_rtn(long2);\n"
28227"ulong2 __ovld __cnfn convert_ulong2_sat_rtn(long2);\n"
28228"ulong2 __ovld __cnfn convert_ulong2(long2);\n"
28229"ulong2 __ovld __cnfn convert_ulong2_sat(long2);\n"
28230"ulong2 __ovld __cnfn convert_ulong2_rte(ulong2);\n"
28231"ulong2 __ovld __cnfn convert_ulong2_sat_rte(ulong2);\n"
28232"ulong2 __ovld __cnfn convert_ulong2_rtz(ulong2);\n"
28233"ulong2 __ovld __cnfn convert_ulong2_sat_rtz(ulong2);\n"
28234"ulong2 __ovld __cnfn convert_ulong2_rtp(ulong2);\n"
28235"ulong2 __ovld __cnfn convert_ulong2_sat_rtp(ulong2);\n"
28236"ulong2 __ovld __cnfn convert_ulong2_rtn(ulong2);\n"
28237"ulong2 __ovld __cnfn convert_ulong2_sat_rtn(ulong2);\n"
28238"ulong2 __ovld __cnfn convert_ulong2(ulong2);\n"
28239"ulong2 __ovld __cnfn convert_ulong2_sat(ulong2);\n"
28240"ulong2 __ovld __cnfn convert_ulong2_rte(float2);\n"
28241"ulong2 __ovld __cnfn convert_ulong2_sat_rte(float2);\n"
28242"ulong2 __ovld __cnfn convert_ulong2_rtz(float2);\n"
28243"ulong2 __ovld __cnfn convert_ulong2_sat_rtz(float2);\n"
28244"ulong2 __ovld __cnfn convert_ulong2_rtp(float2);\n"
28245"ulong2 __ovld __cnfn convert_ulong2_sat_rtp(float2);\n"
28246"ulong2 __ovld __cnfn convert_ulong2_rtn(float2);\n"
28247"ulong2 __ovld __cnfn convert_ulong2_sat_rtn(float2);\n"
28248"ulong2 __ovld __cnfn convert_ulong2(float2);\n"
28249"ulong2 __ovld __cnfn convert_ulong2_sat(float2);\n"
28250"float2 __ovld __cnfn convert_float2_rte(char2);\n"
28251"float2 __ovld __cnfn convert_float2_rtz(char2);\n"
28252"float2 __ovld __cnfn convert_float2_rtp(char2);\n"
28253"float2 __ovld __cnfn convert_float2_rtn(char2);\n"
28254"float2 __ovld __cnfn convert_float2(char2);\n"
28255"float2 __ovld __cnfn convert_float2_rte(uchar2);\n"
28256"float2 __ovld __cnfn convert_float2_rtz(uchar2);\n"
28257"float2 __ovld __cnfn convert_float2_rtp(uchar2);\n"
28258"float2 __ovld __cnfn convert_float2_rtn(uchar2);\n"
28259"float2 __ovld __cnfn convert_float2(uchar2);\n"
28260"float2 __ovld __cnfn convert_float2_rte(short2);\n"
28261"float2 __ovld __cnfn convert_float2_rtz(short2);\n"
28262"float2 __ovld __cnfn convert_float2_rtp(short2);\n"
28263"float2 __ovld __cnfn convert_float2_rtn(short2);\n"
28264"float2 __ovld __cnfn convert_float2(short2);\n"
28265"float2 __ovld __cnfn convert_float2_rte(ushort2);\n"
28266"float2 __ovld __cnfn convert_float2_rtz(ushort2);\n"
28267"float2 __ovld __cnfn convert_float2_rtp(ushort2);\n"
28268"float2 __ovld __cnfn convert_float2_rtn(ushort2);\n"
28269"float2 __ovld __cnfn convert_float2(ushort2);\n"
28270"float2 __ovld __cnfn convert_float2_rte(int2);\n"
28271"float2 __ovld __cnfn convert_float2_rtz(int2);\n"
28272"float2 __ovld __cnfn convert_float2_rtp(int2);\n"
28273"float2 __ovld __cnfn convert_float2_rtn(int2);\n"
28274"float2 __ovld __cnfn convert_float2(int2);\n"
28275"float2 __ovld __cnfn convert_float2_rte(uint2);\n"
28276"float2 __ovld __cnfn convert_float2_rtz(uint2);\n"
28277"float2 __ovld __cnfn convert_float2_rtp(uint2);\n"
28278"float2 __ovld __cnfn convert_float2_rtn(uint2);\n"
28279"float2 __ovld __cnfn convert_float2(uint2);\n"
28280"float2 __ovld __cnfn convert_float2_rte(long2);\n"
28281"float2 __ovld __cnfn convert_float2_rtz(long2);\n"
28282"float2 __ovld __cnfn convert_float2_rtp(long2);\n"
28283"float2 __ovld __cnfn convert_float2_rtn(long2);\n"
28284"float2 __ovld __cnfn convert_float2(long2);\n"
28285"float2 __ovld __cnfn convert_float2_rte(ulong2);\n"
28286"float2 __ovld __cnfn convert_float2_rtz(ulong2);\n"
28287"float2 __ovld __cnfn convert_float2_rtp(ulong2);\n"
28288"float2 __ovld __cnfn convert_float2_rtn(ulong2);\n"
28289"float2 __ovld __cnfn convert_float2(ulong2);\n"
28290"float2 __ovld __cnfn convert_float2_rte(float2);\n"
28291"float2 __ovld __cnfn convert_float2_rtz(float2);\n"
28292"float2 __ovld __cnfn convert_float2_rtp(float2);\n"
28293"float2 __ovld __cnfn convert_float2_rtn(float2);\n"
28294"float2 __ovld __cnfn convert_float2(float2);\n"
28295"char3 __ovld __cnfn convert_char3_rte(char3);\n"
28296"char3 __ovld __cnfn convert_char3_sat_rte(char3);\n"
28297"char3 __ovld __cnfn convert_char3_rtz(char3);\n"
28298"char3 __ovld __cnfn convert_char3_sat_rtz(char3);\n"
28299"char3 __ovld __cnfn convert_char3_rtp(char3);\n"
28300"char3 __ovld __cnfn convert_char3_sat_rtp(char3);\n"
28301"char3 __ovld __cnfn convert_char3_rtn(char3);\n"
28302"char3 __ovld __cnfn convert_char3_sat_rtn(char3);\n"
28303"char3 __ovld __cnfn convert_char3(char3);\n"
28304"char3 __ovld __cnfn convert_char3_sat(char3);\n"
28305"char3 __ovld __cnfn convert_char3_rte(uchar3);\n"
28306"char3 __ovld __cnfn convert_char3_sat_rte(uchar3);\n"
28307"char3 __ovld __cnfn convert_char3_rtz(uchar3);\n"
28308"char3 __ovld __cnfn convert_char3_sat_rtz(uchar3);\n"
28309"char3 __ovld __cnfn convert_char3_rtp(uchar3);\n"
28310"char3 __ovld __cnfn convert_char3_sat_rtp(uchar3);\n"
28311"char3 __ovld __cnfn convert_char3_rtn(uchar3);\n"
28312"char3 __ovld __cnfn convert_char3_sat_rtn(uchar3);\n"
28313"char3 __ovld __cnfn convert_char3(uchar3);\n"
28314"char3 __ovld __cnfn convert_char3_sat(uchar3);\n"
28315"char3 __ovld __cnfn convert_char3_rte(short3);\n"
28316"char3 __ovld __cnfn convert_char3_sat_rte(short3);\n"
28317"char3 __ovld __cnfn convert_char3_rtz(short3);\n"
28318"char3 __ovld __cnfn convert_char3_sat_rtz(short3);\n"
28319"char3 __ovld __cnfn convert_char3_rtp(short3);\n"
28320"char3 __ovld __cnfn convert_char3_sat_rtp(short3);\n"
28321"char3 __ovld __cnfn convert_char3_rtn(short3);\n"
28322"char3 __ovld __cnfn convert_char3_sat_rtn(short3);\n"
28323"char3 __ovld __cnfn convert_char3(short3);\n"
28324"char3 __ovld __cnfn convert_char3_sat(short3);\n"
28325"char3 __ovld __cnfn convert_char3_rte(ushort3);\n"
28326"char3 __ovld __cnfn convert_char3_sat_rte(ushort3);\n"
28327"char3 __ovld __cnfn convert_char3_rtz(ushort3);\n"
28328"char3 __ovld __cnfn convert_char3_sat_rtz(ushort3);\n"
28329"char3 __ovld __cnfn convert_char3_rtp(ushort3);\n"
28330"char3 __ovld __cnfn convert_char3_sat_rtp(ushort3);\n"
28331"char3 __ovld __cnfn convert_char3_rtn(ushort3);\n"
28332"char3 __ovld __cnfn convert_char3_sat_rtn(ushort3);\n"
28333"char3 __ovld __cnfn convert_char3(ushort3);\n"
28334"char3 __ovld __cnfn convert_char3_sat(ushort3);\n"
28335"char3 __ovld __cnfn convert_char3_rte(int3);\n"
28336"char3 __ovld __cnfn convert_char3_sat_rte(int3);\n"
28337"char3 __ovld __cnfn convert_char3_rtz(int3);\n"
28338"char3 __ovld __cnfn convert_char3_sat_rtz(int3);\n"
28339"char3 __ovld __cnfn convert_char3_rtp(int3);\n"
28340"char3 __ovld __cnfn convert_char3_sat_rtp(int3);\n"
28341"char3 __ovld __cnfn convert_char3_rtn(int3);\n"
28342"char3 __ovld __cnfn convert_char3_sat_rtn(int3);\n"
28343"char3 __ovld __cnfn convert_char3(int3);\n"
28344"char3 __ovld __cnfn convert_char3_sat(int3);\n"
28345"char3 __ovld __cnfn convert_char3_rte(uint3);\n"
28346"char3 __ovld __cnfn convert_char3_sat_rte(uint3);\n"
28347"char3 __ovld __cnfn convert_char3_rtz(uint3);\n"
28348"char3 __ovld __cnfn convert_char3_sat_rtz(uint3);\n"
28349"char3 __ovld __cnfn convert_char3_rtp(uint3);\n"
28350"char3 __ovld __cnfn convert_char3_sat_rtp(uint3);\n"
28351"char3 __ovld __cnfn convert_char3_rtn(uint3);\n"
28352"char3 __ovld __cnfn convert_char3_sat_rtn(uint3);\n"
28353"char3 __ovld __cnfn convert_char3(uint3);\n"
28354"char3 __ovld __cnfn convert_char3_sat(uint3);\n"
28355"char3 __ovld __cnfn convert_char3_rte(long3);\n"
28356"char3 __ovld __cnfn convert_char3_sat_rte(long3);\n"
28357"char3 __ovld __cnfn convert_char3_rtz(long3);\n"
28358"char3 __ovld __cnfn convert_char3_sat_rtz(long3);\n"
28359"char3 __ovld __cnfn convert_char3_rtp(long3);\n"
28360"char3 __ovld __cnfn convert_char3_sat_rtp(long3);\n"
28361"char3 __ovld __cnfn convert_char3_rtn(long3);\n"
28362"char3 __ovld __cnfn convert_char3_sat_rtn(long3);\n"
28363"char3 __ovld __cnfn convert_char3(long3);\n"
28364"char3 __ovld __cnfn convert_char3_sat(long3);\n"
28365"char3 __ovld __cnfn convert_char3_rte(ulong3);\n"
28366"char3 __ovld __cnfn convert_char3_sat_rte(ulong3);\n"
28367"char3 __ovld __cnfn convert_char3_rtz(ulong3);\n"
28368"char3 __ovld __cnfn convert_char3_sat_rtz(ulong3);\n"
28369"char3 __ovld __cnfn convert_char3_rtp(ulong3);\n"
28370"char3 __ovld __cnfn convert_char3_sat_rtp(ulong3);\n"
28371"char3 __ovld __cnfn convert_char3_rtn(ulong3);\n"
28372"char3 __ovld __cnfn convert_char3_sat_rtn(ulong3);\n"
28373"char3 __ovld __cnfn convert_char3(ulong3);\n"
28374"char3 __ovld __cnfn convert_char3_sat(ulong3);\n"
28375"char3 __ovld __cnfn convert_char3_rte(float3);\n"
28376"char3 __ovld __cnfn convert_char3_sat_rte(float3);\n"
28377"char3 __ovld __cnfn convert_char3_rtz(float3);\n"
28378"char3 __ovld __cnfn convert_char3_sat_rtz(float3);\n"
28379"char3 __ovld __cnfn convert_char3_rtp(float3);\n"
28380"char3 __ovld __cnfn convert_char3_sat_rtp(float3);\n"
28381"char3 __ovld __cnfn convert_char3_rtn(float3);\n"
28382"char3 __ovld __cnfn convert_char3_sat_rtn(float3);\n"
28383"char3 __ovld __cnfn convert_char3(float3);\n"
28384"char3 __ovld __cnfn convert_char3_sat(float3);\n"
28385"uchar3 __ovld __cnfn convert_uchar3_rte(char3);\n"
28386"uchar3 __ovld __cnfn convert_uchar3_sat_rte(char3);\n"
28387"uchar3 __ovld __cnfn convert_uchar3_rtz(char3);\n"
28388"uchar3 __ovld __cnfn convert_uchar3_sat_rtz(char3);\n"
28389"uchar3 __ovld __cnfn convert_uchar3_rtp(char3);\n"
28390"uchar3 __ovld __cnfn convert_uchar3_sat_rtp(char3);\n"
28391"uchar3 __ovld __cnfn convert_uchar3_rtn(char3);\n"
28392"uchar3 __ovld __cnfn convert_uchar3_sat_rtn(char3);\n"
28393"uchar3 __ovld __cnfn convert_uchar3(char3);\n"
28394"uchar3 __ovld __cnfn convert_uchar3_sat(char3);\n"
28395"uchar3 __ovld __cnfn convert_uchar3_rte(uchar3);\n"
28396"uchar3 __ovld __cnfn convert_uchar3_sat_rte(uchar3);\n"
28397"uchar3 __ovld __cnfn convert_uchar3_rtz(uchar3);\n"
28398"uchar3 __ovld __cnfn convert_uchar3_sat_rtz(uchar3);\n"
28399"uchar3 __ovld __cnfn convert_uchar3_rtp(uchar3);\n"
28400"uchar3 __ovld __cnfn convert_uchar3_sat_rtp(uchar3);\n"
28401"uchar3 __ovld __cnfn convert_uchar3_rtn(uchar3);\n"
28402"uchar3 __ovld __cnfn convert_uchar3_sat_rtn(uchar3);\n"
28403"uchar3 __ovld __cnfn convert_uchar3(uchar3);\n"
28404"uchar3 __ovld __cnfn convert_uchar3_sat(uchar3);\n"
28405"uchar3 __ovld __cnfn convert_uchar3_rte(short3);\n"
28406"uchar3 __ovld __cnfn convert_uchar3_sat_rte(short3);\n"
28407"uchar3 __ovld __cnfn convert_uchar3_rtz(short3);\n"
28408"uchar3 __ovld __cnfn convert_uchar3_sat_rtz(short3);\n"
28409"uchar3 __ovld __cnfn convert_uchar3_rtp(short3);\n"
28410"uchar3 __ovld __cnfn convert_uchar3_sat_rtp(short3);\n"
28411"uchar3 __ovld __cnfn convert_uchar3_rtn(short3);\n"
28412"uchar3 __ovld __cnfn convert_uchar3_sat_rtn(short3);\n"
28413"uchar3 __ovld __cnfn convert_uchar3(short3);\n"
28414"uchar3 __ovld __cnfn convert_uchar3_sat(short3);\n"
28415"uchar3 __ovld __cnfn convert_uchar3_rte(ushort3);\n"
28416"uchar3 __ovld __cnfn convert_uchar3_sat_rte(ushort3);\n"
28417"uchar3 __ovld __cnfn convert_uchar3_rtz(ushort3);\n"
28418"uchar3 __ovld __cnfn convert_uchar3_sat_rtz(ushort3);\n"
28419"uchar3 __ovld __cnfn convert_uchar3_rtp(ushort3);\n"
28420"uchar3 __ovld __cnfn convert_uchar3_sat_rtp(ushort3);\n"
28421"uchar3 __ovld __cnfn convert_uchar3_rtn(ushort3);\n"
28422"uchar3 __ovld __cnfn convert_uchar3_sat_rtn(ushort3);\n"
28423"uchar3 __ovld __cnfn convert_uchar3(ushort3);\n"
28424"uchar3 __ovld __cnfn convert_uchar3_sat(ushort3);\n"
28425"uchar3 __ovld __cnfn convert_uchar3_rte(int3);\n"
28426"uchar3 __ovld __cnfn convert_uchar3_sat_rte(int3);\n"
28427"uchar3 __ovld __cnfn convert_uchar3_rtz(int3);\n"
28428"uchar3 __ovld __cnfn convert_uchar3_sat_rtz(int3);\n"
28429"uchar3 __ovld __cnfn convert_uchar3_rtp(int3);\n"
28430"uchar3 __ovld __cnfn convert_uchar3_sat_rtp(int3);\n"
28431"uchar3 __ovld __cnfn convert_uchar3_rtn(int3);\n"
28432"uchar3 __ovld __cnfn convert_uchar3_sat_rtn(int3);\n"
28433"uchar3 __ovld __cnfn convert_uchar3(int3);\n"
28434"uchar3 __ovld __cnfn convert_uchar3_sat(int3);\n"
28435"uchar3 __ovld __cnfn convert_uchar3_rte(uint3);\n"
28436"uchar3 __ovld __cnfn convert_uchar3_sat_rte(uint3);\n"
28437"uchar3 __ovld __cnfn convert_uchar3_rtz(uint3);\n"
28438"uchar3 __ovld __cnfn convert_uchar3_sat_rtz(uint3);\n"
28439"uchar3 __ovld __cnfn convert_uchar3_rtp(uint3);\n"
28440"uchar3 __ovld __cnfn convert_uchar3_sat_rtp(uint3);\n"
28441"uchar3 __ovld __cnfn convert_uchar3_rtn(uint3);\n"
28442"uchar3 __ovld __cnfn convert_uchar3_sat_rtn(uint3);\n"
28443"uchar3 __ovld __cnfn convert_uchar3(uint3);\n"
28444"uchar3 __ovld __cnfn convert_uchar3_sat(uint3);\n"
28445"uchar3 __ovld __cnfn convert_uchar3_rte(long3);\n"
28446"uchar3 __ovld __cnfn convert_uchar3_sat_rte(long3);\n"
28447"uchar3 __ovld __cnfn convert_uchar3_rtz(long3);\n"
28448"uchar3 __ovld __cnfn convert_uchar3_sat_rtz(long3);\n"
28449"uchar3 __ovld __cnfn convert_uchar3_rtp(long3);\n"
28450"uchar3 __ovld __cnfn convert_uchar3_sat_rtp(long3);\n"
28451"uchar3 __ovld __cnfn convert_uchar3_rtn(long3);\n"
28452"uchar3 __ovld __cnfn convert_uchar3_sat_rtn(long3);\n"
28453"uchar3 __ovld __cnfn convert_uchar3(long3);\n"
28454"uchar3 __ovld __cnfn convert_uchar3_sat(long3);\n"
28455"uchar3 __ovld __cnfn convert_uchar3_rte(ulong3);\n"
28456"uchar3 __ovld __cnfn convert_uchar3_sat_rte(ulong3);\n"
28457"uchar3 __ovld __cnfn convert_uchar3_rtz(ulong3);\n"
28458"uchar3 __ovld __cnfn convert_uchar3_sat_rtz(ulong3);\n"
28459"uchar3 __ovld __cnfn convert_uchar3_rtp(ulong3);\n"
28460"uchar3 __ovld __cnfn convert_uchar3_sat_rtp(ulong3);\n"
28461"uchar3 __ovld __cnfn convert_uchar3_rtn(ulong3);\n"
28462"uchar3 __ovld __cnfn convert_uchar3_sat_rtn(ulong3);\n"
28463"uchar3 __ovld __cnfn convert_uchar3(ulong3);\n"
28464"uchar3 __ovld __cnfn convert_uchar3_sat(ulong3);\n"
28465"uchar3 __ovld __cnfn convert_uchar3_rte(float3);\n"
28466"uchar3 __ovld __cnfn convert_uchar3_sat_rte(float3);\n"
28467"uchar3 __ovld __cnfn convert_uchar3_rtz(float3);\n"
28468"uchar3 __ovld __cnfn convert_uchar3_sat_rtz(float3);\n"
28469"uchar3 __ovld __cnfn convert_uchar3_rtp(float3);\n"
28470"uchar3 __ovld __cnfn convert_uchar3_sat_rtp(float3);\n"
28471"uchar3 __ovld __cnfn convert_uchar3_rtn(float3);\n"
28472"uchar3 __ovld __cnfn convert_uchar3_sat_rtn(float3);\n"
28473"uchar3 __ovld __cnfn convert_uchar3(float3);\n"
28474"uchar3 __ovld __cnfn convert_uchar3_sat(float3);\n"
28475"short3 __ovld __cnfn convert_short3_rte(char3);\n"
28476"short3 __ovld __cnfn convert_short3_sat_rte(char3);\n"
28477"short3 __ovld __cnfn convert_short3_rtz(char3);\n"
28478"short3 __ovld __cnfn convert_short3_sat_rtz(char3);\n"
28479"short3 __ovld __cnfn convert_short3_rtp(char3);\n"
28480"short3 __ovld __cnfn convert_short3_sat_rtp(char3);\n"
28481"short3 __ovld __cnfn convert_short3_rtn(char3);\n"
28482"short3 __ovld __cnfn convert_short3_sat_rtn(char3);\n"
28483"short3 __ovld __cnfn convert_short3(char3);\n"
28484"short3 __ovld __cnfn convert_short3_sat(char3);\n"
28485"short3 __ovld __cnfn convert_short3_rte(uchar3);\n"
28486"short3 __ovld __cnfn convert_short3_sat_rte(uchar3);\n"
28487"short3 __ovld __cnfn convert_short3_rtz(uchar3);\n"
28488"short3 __ovld __cnfn convert_short3_sat_rtz(uchar3);\n"
28489"short3 __ovld __cnfn convert_short3_rtp(uchar3);\n"
28490"short3 __ovld __cnfn convert_short3_sat_rtp(uchar3);\n"
28491"short3 __ovld __cnfn convert_short3_rtn(uchar3);\n"
28492"short3 __ovld __cnfn convert_short3_sat_rtn(uchar3);\n"
28493"short3 __ovld __cnfn convert_short3(uchar3);\n"
28494"short3 __ovld __cnfn convert_short3_sat(uchar3);\n"
28495"short3 __ovld __cnfn convert_short3_rte(short3);\n"
28496"short3 __ovld __cnfn convert_short3_sat_rte(short3);\n"
28497"short3 __ovld __cnfn convert_short3_rtz(short3);\n"
28498"short3 __ovld __cnfn convert_short3_sat_rtz(short3);\n"
28499"short3 __ovld __cnfn convert_short3_rtp(short3);\n"
28500"short3 __ovld __cnfn convert_short3_sat_rtp(short3);\n"
28501"short3 __ovld __cnfn convert_short3_rtn(short3);\n"
28502"short3 __ovld __cnfn convert_short3_sat_rtn(short3);\n"
28503"short3 __ovld __cnfn convert_short3(short3);\n"
28504"short3 __ovld __cnfn convert_short3_sat(short3);\n"
28505"short3 __ovld __cnfn convert_short3_rte(ushort3);\n"
28506"short3 __ovld __cnfn convert_short3_sat_rte(ushort3);\n"
28507"short3 __ovld __cnfn convert_short3_rtz(ushort3);\n"
28508"short3 __ovld __cnfn convert_short3_sat_rtz(ushort3);\n"
28509"short3 __ovld __cnfn convert_short3_rtp(ushort3);\n"
28510"short3 __ovld __cnfn convert_short3_sat_rtp(ushort3);\n"
28511"short3 __ovld __cnfn convert_short3_rtn(ushort3);\n"
28512"short3 __ovld __cnfn convert_short3_sat_rtn(ushort3);\n"
28513"short3 __ovld __cnfn convert_short3(ushort3);\n"
28514"short3 __ovld __cnfn convert_short3_sat(ushort3);\n"
28515"short3 __ovld __cnfn convert_short3_rte(int3);\n"
28516"short3 __ovld __cnfn convert_short3_sat_rte(int3);\n"
28517"short3 __ovld __cnfn convert_short3_rtz(int3);\n"
28518"short3 __ovld __cnfn convert_short3_sat_rtz(int3);\n"
28519"short3 __ovld __cnfn convert_short3_rtp(int3);\n"
28520"short3 __ovld __cnfn convert_short3_sat_rtp(int3);\n"
28521"short3 __ovld __cnfn convert_short3_rtn(int3);\n"
28522"short3 __ovld __cnfn convert_short3_sat_rtn(int3);\n"
28523"short3 __ovld __cnfn convert_short3(int3);\n"
28524"short3 __ovld __cnfn convert_short3_sat(int3);\n"
28525"short3 __ovld __cnfn convert_short3_rte(uint3);\n"
28526"short3 __ovld __cnfn convert_short3_sat_rte(uint3);\n"
28527"short3 __ovld __cnfn convert_short3_rtz(uint3);\n"
28528"short3 __ovld __cnfn convert_short3_sat_rtz(uint3);\n"
28529"short3 __ovld __cnfn convert_short3_rtp(uint3);\n"
28530"short3 __ovld __cnfn convert_short3_sat_rtp(uint3);\n"
28531"short3 __ovld __cnfn convert_short3_rtn(uint3);\n"
28532"short3 __ovld __cnfn convert_short3_sat_rtn(uint3);\n"
28533"short3 __ovld __cnfn convert_short3(uint3);\n"
28534"short3 __ovld __cnfn convert_short3_sat(uint3);\n"
28535"short3 __ovld __cnfn convert_short3_rte(long3);\n"
28536"short3 __ovld __cnfn convert_short3_sat_rte(long3);\n"
28537"short3 __ovld __cnfn convert_short3_rtz(long3);\n"
28538"short3 __ovld __cnfn convert_short3_sat_rtz(long3);\n"
28539"short3 __ovld __cnfn convert_short3_rtp(long3);\n"
28540"short3 __ovld __cnfn convert_short3_sat_rtp(long3);\n"
28541"short3 __ovld __cnfn convert_short3_rtn(long3);\n"
28542"short3 __ovld __cnfn convert_short3_sat_rtn(long3);\n"
28543"short3 __ovld __cnfn convert_short3(long3);\n"
28544"short3 __ovld __cnfn convert_short3_sat(long3);\n"
28545"short3 __ovld __cnfn convert_short3_rte(ulong3);\n"
28546"short3 __ovld __cnfn convert_short3_sat_rte(ulong3);\n"
28547"short3 __ovld __cnfn convert_short3_rtz(ulong3);\n"
28548"short3 __ovld __cnfn convert_short3_sat_rtz(ulong3);\n"
28549"short3 __ovld __cnfn convert_short3_rtp(ulong3);\n"
28550"short3 __ovld __cnfn convert_short3_sat_rtp(ulong3);\n"
28551"short3 __ovld __cnfn convert_short3_rtn(ulong3);\n"
28552"short3 __ovld __cnfn convert_short3_sat_rtn(ulong3);\n"
28553"short3 __ovld __cnfn convert_short3(ulong3);\n"
28554"short3 __ovld __cnfn convert_short3_sat(ulong3);\n"
28555"short3 __ovld __cnfn convert_short3_rte(float3);\n"
28556"short3 __ovld __cnfn convert_short3_sat_rte(float3);\n"
28557"short3 __ovld __cnfn convert_short3_rtz(float3);\n"
28558"short3 __ovld __cnfn convert_short3_sat_rtz(float3);\n"
28559"short3 __ovld __cnfn convert_short3_rtp(float3);\n"
28560"short3 __ovld __cnfn convert_short3_sat_rtp(float3);\n"
28561"short3 __ovld __cnfn convert_short3_rtn(float3);\n"
28562"short3 __ovld __cnfn convert_short3_sat_rtn(float3);\n"
28563"short3 __ovld __cnfn convert_short3(float3);\n"
28564"short3 __ovld __cnfn convert_short3_sat(float3);\n"
28565"ushort3 __ovld __cnfn convert_ushort3_rte(char3);\n"
28566"ushort3 __ovld __cnfn convert_ushort3_sat_rte(char3);\n"
28567"ushort3 __ovld __cnfn convert_ushort3_rtz(char3);\n"
28568"ushort3 __ovld __cnfn convert_ushort3_sat_rtz(char3);\n"
28569"ushort3 __ovld __cnfn convert_ushort3_rtp(char3);\n"
28570"ushort3 __ovld __cnfn convert_ushort3_sat_rtp(char3);\n"
28571"ushort3 __ovld __cnfn convert_ushort3_rtn(char3);\n"
28572"ushort3 __ovld __cnfn convert_ushort3_sat_rtn(char3);\n"
28573"ushort3 __ovld __cnfn convert_ushort3(char3);\n"
28574"ushort3 __ovld __cnfn convert_ushort3_sat(char3);\n"
28575"ushort3 __ovld __cnfn convert_ushort3_rte(uchar3);\n"
28576"ushort3 __ovld __cnfn convert_ushort3_sat_rte(uchar3);\n"
28577"ushort3 __ovld __cnfn convert_ushort3_rtz(uchar3);\n"
28578"ushort3 __ovld __cnfn convert_ushort3_sat_rtz(uchar3);\n"
28579"ushort3 __ovld __cnfn convert_ushort3_rtp(uchar3);\n"
28580"ushort3 __ovld __cnfn convert_ushort3_sat_rtp(uchar3);\n"
28581"ushort3 __ovld __cnfn convert_ushort3_rtn(uchar3);\n"
28582"ushort3 __ovld __cnfn convert_ushort3_sat_rtn(uchar3);\n"
28583"ushort3 __ovld __cnfn convert_ushort3(uchar3);\n"
28584"ushort3 __ovld __cnfn convert_ushort3_sat(uchar3);\n"
28585"ushort3 __ovld __cnfn convert_ushort3_rte(short3);\n"
28586"ushort3 __ovld __cnfn convert_ushort3_sat_rte(short3);\n"
28587"ushort3 __ovld __cnfn convert_ushort3_rtz(short3);\n"
28588"ushort3 __ovld __cnfn convert_ushort3_sat_rtz(short3);\n"
28589"ushort3 __ovld __cnfn convert_ushort3_rtp(short3);\n"
28590"ushort3 __ovld __cnfn convert_ushort3_sat_rtp(short3);\n"
28591"ushort3 __ovld __cnfn convert_ushort3_rtn(short3);\n"
28592"ushort3 __ovld __cnfn convert_ushort3_sat_rtn(short3);\n"
28593"ushort3 __ovld __cnfn convert_ushort3(short3);\n"
28594"ushort3 __ovld __cnfn convert_ushort3_sat(short3);\n"
28595"ushort3 __ovld __cnfn convert_ushort3_rte(ushort3);\n"
28596"ushort3 __ovld __cnfn convert_ushort3_sat_rte(ushort3);\n"
28597"ushort3 __ovld __cnfn convert_ushort3_rtz(ushort3);\n"
28598"ushort3 __ovld __cnfn convert_ushort3_sat_rtz(ushort3);\n"
28599"ushort3 __ovld __cnfn convert_ushort3_rtp(ushort3);\n"
28600"ushort3 __ovld __cnfn convert_ushort3_sat_rtp(ushort3);\n"
28601"ushort3 __ovld __cnfn convert_ushort3_rtn(ushort3);\n"
28602"ushort3 __ovld __cnfn convert_ushort3_sat_rtn(ushort3);\n"
28603"ushort3 __ovld __cnfn convert_ushort3(ushort3);\n"
28604"ushort3 __ovld __cnfn convert_ushort3_sat(ushort3);\n"
28605"ushort3 __ovld __cnfn convert_ushort3_rte(int3);\n"
28606"ushort3 __ovld __cnfn convert_ushort3_sat_rte(int3);\n"
28607"ushort3 __ovld __cnfn convert_ushort3_rtz(int3);\n"
28608"ushort3 __ovld __cnfn convert_ushort3_sat_rtz(int3);\n"
28609"ushort3 __ovld __cnfn convert_ushort3_rtp(int3);\n"
28610"ushort3 __ovld __cnfn convert_ushort3_sat_rtp(int3);\n"
28611"ushort3 __ovld __cnfn convert_ushort3_rtn(int3);\n"
28612"ushort3 __ovld __cnfn convert_ushort3_sat_rtn(int3);\n"
28613"ushort3 __ovld __cnfn convert_ushort3(int3);\n"
28614"ushort3 __ovld __cnfn convert_ushort3_sat(int3);\n"
28615"ushort3 __ovld __cnfn convert_ushort3_rte(uint3);\n"
28616"ushort3 __ovld __cnfn convert_ushort3_sat_rte(uint3);\n"
28617"ushort3 __ovld __cnfn convert_ushort3_rtz(uint3);\n"
28618"ushort3 __ovld __cnfn convert_ushort3_sat_rtz(uint3);\n"
28619"ushort3 __ovld __cnfn convert_ushort3_rtp(uint3);\n"
28620"ushort3 __ovld __cnfn convert_ushort3_sat_rtp(uint3);\n"
28621"ushort3 __ovld __cnfn convert_ushort3_rtn(uint3);\n"
28622"ushort3 __ovld __cnfn convert_ushort3_sat_rtn(uint3);\n"
28623"ushort3 __ovld __cnfn convert_ushort3(uint3);\n"
28624"ushort3 __ovld __cnfn convert_ushort3_sat(uint3);\n"
28625"ushort3 __ovld __cnfn convert_ushort3_rte(long3);\n"
28626"ushort3 __ovld __cnfn convert_ushort3_sat_rte(long3);\n"
28627"ushort3 __ovld __cnfn convert_ushort3_rtz(long3);\n"
28628"ushort3 __ovld __cnfn convert_ushort3_sat_rtz(long3);\n"
28629"ushort3 __ovld __cnfn convert_ushort3_rtp(long3);\n"
28630"ushort3 __ovld __cnfn convert_ushort3_sat_rtp(long3);\n"
28631"ushort3 __ovld __cnfn convert_ushort3_rtn(long3);\n"
28632"ushort3 __ovld __cnfn convert_ushort3_sat_rtn(long3);\n"
28633"ushort3 __ovld __cnfn convert_ushort3(long3);\n"
28634"ushort3 __ovld __cnfn convert_ushort3_sat(long3);\n"
28635"ushort3 __ovld __cnfn convert_ushort3_rte(ulong3);\n"
28636"ushort3 __ovld __cnfn convert_ushort3_sat_rte(ulong3);\n"
28637"ushort3 __ovld __cnfn convert_ushort3_rtz(ulong3);\n"
28638"ushort3 __ovld __cnfn convert_ushort3_sat_rtz(ulong3);\n"
28639"ushort3 __ovld __cnfn convert_ushort3_rtp(ulong3);\n"
28640"ushort3 __ovld __cnfn convert_ushort3_sat_rtp(ulong3);\n"
28641"ushort3 __ovld __cnfn convert_ushort3_rtn(ulong3);\n"
28642"ushort3 __ovld __cnfn convert_ushort3_sat_rtn(ulong3);\n"
28643"ushort3 __ovld __cnfn convert_ushort3(ulong3);\n"
28644"ushort3 __ovld __cnfn convert_ushort3_sat(ulong3);\n"
28645"ushort3 __ovld __cnfn convert_ushort3_rte(float3);\n"
28646"ushort3 __ovld __cnfn convert_ushort3_sat_rte(float3);\n"
28647"ushort3 __ovld __cnfn convert_ushort3_rtz(float3);\n"
28648"ushort3 __ovld __cnfn convert_ushort3_sat_rtz(float3);\n"
28649"ushort3 __ovld __cnfn convert_ushort3_rtp(float3);\n"
28650"ushort3 __ovld __cnfn convert_ushort3_sat_rtp(float3);\n"
28651"ushort3 __ovld __cnfn convert_ushort3_rtn(float3);\n"
28652"ushort3 __ovld __cnfn convert_ushort3_sat_rtn(float3);\n"
28653"ushort3 __ovld __cnfn convert_ushort3(float3);\n"
28654"ushort3 __ovld __cnfn convert_ushort3_sat(float3);\n"
28655"int3 __ovld __cnfn convert_int3_rte(char3);\n"
28656"int3 __ovld __cnfn convert_int3_sat_rte(char3);\n"
28657"int3 __ovld __cnfn convert_int3_rtz(char3);\n"
28658"int3 __ovld __cnfn convert_int3_sat_rtz(char3);\n"
28659"int3 __ovld __cnfn convert_int3_rtp(char3);\n"
28660"int3 __ovld __cnfn convert_int3_sat_rtp(char3);\n"
28661"int3 __ovld __cnfn convert_int3_rtn(char3);\n"
28662"int3 __ovld __cnfn convert_int3_sat_rtn(char3);\n"
28663"int3 __ovld __cnfn convert_int3(char3);\n"
28664"int3 __ovld __cnfn convert_int3_sat(char3);\n"
28665"int3 __ovld __cnfn convert_int3_rte(uchar3);\n"
28666"int3 __ovld __cnfn convert_int3_sat_rte(uchar3);\n"
28667"int3 __ovld __cnfn convert_int3_rtz(uchar3);\n"
28668"int3 __ovld __cnfn convert_int3_sat_rtz(uchar3);\n"
28669"int3 __ovld __cnfn convert_int3_rtp(uchar3);\n"
28670"int3 __ovld __cnfn convert_int3_sat_rtp(uchar3);\n"
28671"int3 __ovld __cnfn convert_int3_rtn(uchar3);\n"
28672"int3 __ovld __cnfn convert_int3_sat_rtn(uchar3);\n"
28673"int3 __ovld __cnfn convert_int3(uchar3);\n"
28674"int3 __ovld __cnfn convert_int3_sat(uchar3);\n"
28675"int3 __ovld __cnfn convert_int3_rte(short3);\n"
28676"int3 __ovld __cnfn convert_int3_sat_rte(short3);\n"
28677"int3 __ovld __cnfn convert_int3_rtz(short3);\n"
28678"int3 __ovld __cnfn convert_int3_sat_rtz(short3);\n"
28679"int3 __ovld __cnfn convert_int3_rtp(short3);\n"
28680"int3 __ovld __cnfn convert_int3_sat_rtp(short3);\n"
28681"int3 __ovld __cnfn convert_int3_rtn(short3);\n"
28682"int3 __ovld __cnfn convert_int3_sat_rtn(short3);\n"
28683"int3 __ovld __cnfn convert_int3(short3);\n"
28684"int3 __ovld __cnfn convert_int3_sat(short3);\n"
28685"int3 __ovld __cnfn convert_int3_rte(ushort3);\n"
28686"int3 __ovld __cnfn convert_int3_sat_rte(ushort3);\n"
28687"int3 __ovld __cnfn convert_int3_rtz(ushort3);\n"
28688"int3 __ovld __cnfn convert_int3_sat_rtz(ushort3);\n"
28689"int3 __ovld __cnfn convert_int3_rtp(ushort3);\n"
28690"int3 __ovld __cnfn convert_int3_sat_rtp(ushort3);\n"
28691"int3 __ovld __cnfn convert_int3_rtn(ushort3);\n"
28692"int3 __ovld __cnfn convert_int3_sat_rtn(ushort3);\n"
28693"int3 __ovld __cnfn convert_int3(ushort3);\n"
28694"int3 __ovld __cnfn convert_int3_sat(ushort3);\n"
28695"int3 __ovld __cnfn convert_int3_rte(int3);\n"
28696"int3 __ovld __cnfn convert_int3_sat_rte(int3);\n"
28697"int3 __ovld __cnfn convert_int3_rtz(int3);\n"
28698"int3 __ovld __cnfn convert_int3_sat_rtz(int3);\n"
28699"int3 __ovld __cnfn convert_int3_rtp(int3);\n"
28700"int3 __ovld __cnfn convert_int3_sat_rtp(int3);\n"
28701"int3 __ovld __cnfn convert_int3_rtn(int3);\n"
28702"int3 __ovld __cnfn convert_int3_sat_rtn(int3);\n"
28703"int3 __ovld __cnfn convert_int3(int3);\n"
28704"int3 __ovld __cnfn convert_int3_sat(int3);\n"
28705"int3 __ovld __cnfn convert_int3_rte(uint3);\n"
28706"int3 __ovld __cnfn convert_int3_sat_rte(uint3);\n"
28707"int3 __ovld __cnfn convert_int3_rtz(uint3);\n"
28708"int3 __ovld __cnfn convert_int3_sat_rtz(uint3);\n"
28709"int3 __ovld __cnfn convert_int3_rtp(uint3);\n"
28710"int3 __ovld __cnfn convert_int3_sat_rtp(uint3);\n"
28711"int3 __ovld __cnfn convert_int3_rtn(uint3);\n"
28712"int3 __ovld __cnfn convert_int3_sat_rtn(uint3);\n"
28713"int3 __ovld __cnfn convert_int3(uint3);\n"
28714"int3 __ovld __cnfn convert_int3_sat(uint3);\n"
28715"int3 __ovld __cnfn convert_int3_rte(long3);\n"
28716"int3 __ovld __cnfn convert_int3_sat_rte(long3);\n"
28717"int3 __ovld __cnfn convert_int3_rtz(long3);\n"
28718"int3 __ovld __cnfn convert_int3_sat_rtz(long3);\n"
28719"int3 __ovld __cnfn convert_int3_rtp(long3);\n"
28720"int3 __ovld __cnfn convert_int3_sat_rtp(long3);\n"
28721"int3 __ovld __cnfn convert_int3_rtn(long3);\n"
28722"int3 __ovld __cnfn convert_int3_sat_rtn(long3);\n"
28723"int3 __ovld __cnfn convert_int3(long3);\n"
28724"int3 __ovld __cnfn convert_int3_sat(long3);\n"
28725"int3 __ovld __cnfn convert_int3_rte(ulong3);\n"
28726"int3 __ovld __cnfn convert_int3_sat_rte(ulong3);\n"
28727"int3 __ovld __cnfn convert_int3_rtz(ulong3);\n"
28728"int3 __ovld __cnfn convert_int3_sat_rtz(ulong3);\n"
28729"int3 __ovld __cnfn convert_int3_rtp(ulong3);\n"
28730"int3 __ovld __cnfn convert_int3_sat_rtp(ulong3);\n"
28731"int3 __ovld __cnfn convert_int3_rtn(ulong3);\n"
28732"int3 __ovld __cnfn convert_int3_sat_rtn(ulong3);\n"
28733"int3 __ovld __cnfn convert_int3(ulong3);\n"
28734"int3 __ovld __cnfn convert_int3_sat(ulong3);\n"
28735"int3 __ovld __cnfn convert_int3_rte(float3);\n"
28736"int3 __ovld __cnfn convert_int3_sat_rte(float3);\n"
28737"int3 __ovld __cnfn convert_int3_rtz(float3);\n"
28738"int3 __ovld __cnfn convert_int3_sat_rtz(float3);\n"
28739"int3 __ovld __cnfn convert_int3_rtp(float3);\n"
28740"int3 __ovld __cnfn convert_int3_sat_rtp(float3);\n"
28741"int3 __ovld __cnfn convert_int3_rtn(float3);\n"
28742"int3 __ovld __cnfn convert_int3_sat_rtn(float3);\n"
28743"int3 __ovld __cnfn convert_int3(float3);\n"
28744"int3 __ovld __cnfn convert_int3_sat(float3);\n"
28745"uint3 __ovld __cnfn convert_uint3_rte(char3);\n"
28746"uint3 __ovld __cnfn convert_uint3_sat_rte(char3);\n"
28747"uint3 __ovld __cnfn convert_uint3_rtz(char3);\n"
28748"uint3 __ovld __cnfn convert_uint3_sat_rtz(char3);\n"
28749"uint3 __ovld __cnfn convert_uint3_rtp(char3);\n"
28750"uint3 __ovld __cnfn convert_uint3_sat_rtp(char3);\n"
28751"uint3 __ovld __cnfn convert_uint3_rtn(char3);\n"
28752"uint3 __ovld __cnfn convert_uint3_sat_rtn(char3);\n"
28753"uint3 __ovld __cnfn convert_uint3(char3);\n"
28754"uint3 __ovld __cnfn convert_uint3_sat(char3);\n"
28755"uint3 __ovld __cnfn convert_uint3_rte(uchar3);\n"
28756"uint3 __ovld __cnfn convert_uint3_sat_rte(uchar3);\n"
28757"uint3 __ovld __cnfn convert_uint3_rtz(uchar3);\n"
28758"uint3 __ovld __cnfn convert_uint3_sat_rtz(uchar3);\n"
28759"uint3 __ovld __cnfn convert_uint3_rtp(uchar3);\n"
28760"uint3 __ovld __cnfn convert_uint3_sat_rtp(uchar3);\n"
28761"uint3 __ovld __cnfn convert_uint3_rtn(uchar3);\n"
28762"uint3 __ovld __cnfn convert_uint3_sat_rtn(uchar3);\n"
28763"uint3 __ovld __cnfn convert_uint3(uchar3);\n"
28764"uint3 __ovld __cnfn convert_uint3_sat(uchar3);\n"
28765"uint3 __ovld __cnfn convert_uint3_rte(short3);\n"
28766"uint3 __ovld __cnfn convert_uint3_sat_rte(short3);\n"
28767"uint3 __ovld __cnfn convert_uint3_rtz(short3);\n"
28768"uint3 __ovld __cnfn convert_uint3_sat_rtz(short3);\n"
28769"uint3 __ovld __cnfn convert_uint3_rtp(short3);\n"
28770"uint3 __ovld __cnfn convert_uint3_sat_rtp(short3);\n"
28771"uint3 __ovld __cnfn convert_uint3_rtn(short3);\n"
28772"uint3 __ovld __cnfn convert_uint3_sat_rtn(short3);\n"
28773"uint3 __ovld __cnfn convert_uint3(short3);\n"
28774"uint3 __ovld __cnfn convert_uint3_sat(short3);\n"
28775"uint3 __ovld __cnfn convert_uint3_rte(ushort3);\n"
28776"uint3 __ovld __cnfn convert_uint3_sat_rte(ushort3);\n"
28777"uint3 __ovld __cnfn convert_uint3_rtz(ushort3);\n"
28778"uint3 __ovld __cnfn convert_uint3_sat_rtz(ushort3);\n"
28779"uint3 __ovld __cnfn convert_uint3_rtp(ushort3);\n"
28780"uint3 __ovld __cnfn convert_uint3_sat_rtp(ushort3);\n"
28781"uint3 __ovld __cnfn convert_uint3_rtn(ushort3);\n"
28782"uint3 __ovld __cnfn convert_uint3_sat_rtn(ushort3);\n"
28783"uint3 __ovld __cnfn convert_uint3(ushort3);\n"
28784"uint3 __ovld __cnfn convert_uint3_sat(ushort3);\n"
28785"uint3 __ovld __cnfn convert_uint3_rte(int3);\n"
28786"uint3 __ovld __cnfn convert_uint3_sat_rte(int3);\n"
28787"uint3 __ovld __cnfn convert_uint3_rtz(int3);\n"
28788"uint3 __ovld __cnfn convert_uint3_sat_rtz(int3);\n"
28789"uint3 __ovld __cnfn convert_uint3_rtp(int3);\n"
28790"uint3 __ovld __cnfn convert_uint3_sat_rtp(int3);\n"
28791"uint3 __ovld __cnfn convert_uint3_rtn(int3);\n"
28792"uint3 __ovld __cnfn convert_uint3_sat_rtn(int3);\n"
28793"uint3 __ovld __cnfn convert_uint3(int3);\n"
28794"uint3 __ovld __cnfn convert_uint3_sat(int3);\n"
28795"uint3 __ovld __cnfn convert_uint3_rte(uint3);\n"
28796"uint3 __ovld __cnfn convert_uint3_sat_rte(uint3);\n"
28797"uint3 __ovld __cnfn convert_uint3_rtz(uint3);\n"
28798"uint3 __ovld __cnfn convert_uint3_sat_rtz(uint3);\n"
28799"uint3 __ovld __cnfn convert_uint3_rtp(uint3);\n"
28800"uint3 __ovld __cnfn convert_uint3_sat_rtp(uint3);\n"
28801"uint3 __ovld __cnfn convert_uint3_rtn(uint3);\n"
28802"uint3 __ovld __cnfn convert_uint3_sat_rtn(uint3);\n"
28803"uint3 __ovld __cnfn convert_uint3(uint3);\n"
28804"uint3 __ovld __cnfn convert_uint3_sat(uint3);\n"
28805"uint3 __ovld __cnfn convert_uint3_rte(long3);\n"
28806"uint3 __ovld __cnfn convert_uint3_sat_rte(long3);\n"
28807"uint3 __ovld __cnfn convert_uint3_rtz(long3);\n"
28808"uint3 __ovld __cnfn convert_uint3_sat_rtz(long3);\n"
28809"uint3 __ovld __cnfn convert_uint3_rtp(long3);\n"
28810"uint3 __ovld __cnfn convert_uint3_sat_rtp(long3);\n"
28811"uint3 __ovld __cnfn convert_uint3_rtn(long3);\n"
28812"uint3 __ovld __cnfn convert_uint3_sat_rtn(long3);\n"
28813"uint3 __ovld __cnfn convert_uint3(long3);\n"
28814"uint3 __ovld __cnfn convert_uint3_sat(long3);\n"
28815"uint3 __ovld __cnfn convert_uint3_rte(ulong3);\n"
28816"uint3 __ovld __cnfn convert_uint3_sat_rte(ulong3);\n"
28817"uint3 __ovld __cnfn convert_uint3_rtz(ulong3);\n"
28818"uint3 __ovld __cnfn convert_uint3_sat_rtz(ulong3);\n"
28819"uint3 __ovld __cnfn convert_uint3_rtp(ulong3);\n"
28820"uint3 __ovld __cnfn convert_uint3_sat_rtp(ulong3);\n"
28821"uint3 __ovld __cnfn convert_uint3_rtn(ulong3);\n"
28822"uint3 __ovld __cnfn convert_uint3_sat_rtn(ulong3);\n"
28823"uint3 __ovld __cnfn convert_uint3(ulong3);\n"
28824"uint3 __ovld __cnfn convert_uint3_sat(ulong3);\n"
28825"uint3 __ovld __cnfn convert_uint3_rte(float3);\n"
28826"uint3 __ovld __cnfn convert_uint3_sat_rte(float3);\n"
28827"uint3 __ovld __cnfn convert_uint3_rtz(float3);\n"
28828"uint3 __ovld __cnfn convert_uint3_sat_rtz(float3);\n"
28829"uint3 __ovld __cnfn convert_uint3_rtp(float3);\n"
28830"uint3 __ovld __cnfn convert_uint3_sat_rtp(float3);\n"
28831"uint3 __ovld __cnfn convert_uint3_rtn(float3);\n"
28832"uint3 __ovld __cnfn convert_uint3_sat_rtn(float3);\n"
28833"uint3 __ovld __cnfn convert_uint3(float3);\n"
28834"uint3 __ovld __cnfn convert_uint3_sat(float3);\n"
28835"long3 __ovld __cnfn convert_long3_rte(char3);\n"
28836"long3 __ovld __cnfn convert_long3_sat_rte(char3);\n"
28837"long3 __ovld __cnfn convert_long3_rtz(char3);\n"
28838"long3 __ovld __cnfn convert_long3_sat_rtz(char3);\n"
28839"long3 __ovld __cnfn convert_long3_rtp(char3);\n"
28840"long3 __ovld __cnfn convert_long3_sat_rtp(char3);\n"
28841"long3 __ovld __cnfn convert_long3_rtn(char3);\n"
28842"long3 __ovld __cnfn convert_long3_sat_rtn(char3);\n"
28843"long3 __ovld __cnfn convert_long3(char3);\n"
28844"long3 __ovld __cnfn convert_long3_sat(char3);\n"
28845"long3 __ovld __cnfn convert_long3_rte(uchar3);\n"
28846"long3 __ovld __cnfn convert_long3_sat_rte(uchar3);\n"
28847"long3 __ovld __cnfn convert_long3_rtz(uchar3);\n"
28848"long3 __ovld __cnfn convert_long3_sat_rtz(uchar3);\n"
28849"long3 __ovld __cnfn convert_long3_rtp(uchar3);\n"
28850"long3 __ovld __cnfn convert_long3_sat_rtp(uchar3);\n"
28851"long3 __ovld __cnfn convert_long3_rtn(uchar3);\n"
28852"long3 __ovld __cnfn convert_long3_sat_rtn(uchar3);\n"
28853"long3 __ovld __cnfn convert_long3(uchar3);\n"
28854"long3 __ovld __cnfn convert_long3_sat(uchar3);\n"
28855"long3 __ovld __cnfn convert_long3_rte(short3);\n"
28856"long3 __ovld __cnfn convert_long3_sat_rte(short3);\n"
28857"long3 __ovld __cnfn convert_long3_rtz(short3);\n"
28858"long3 __ovld __cnfn convert_long3_sat_rtz(short3);\n"
28859"long3 __ovld __cnfn convert_long3_rtp(short3);\n"
28860"long3 __ovld __cnfn convert_long3_sat_rtp(short3);\n"
28861"long3 __ovld __cnfn convert_long3_rtn(short3);\n"
28862"long3 __ovld __cnfn convert_long3_sat_rtn(short3);\n"
28863"long3 __ovld __cnfn convert_long3(short3);\n"
28864"long3 __ovld __cnfn convert_long3_sat(short3);\n"
28865"long3 __ovld __cnfn convert_long3_rte(ushort3);\n"
28866"long3 __ovld __cnfn convert_long3_sat_rte(ushort3);\n"
28867"long3 __ovld __cnfn convert_long3_rtz(ushort3);\n"
28868"long3 __ovld __cnfn convert_long3_sat_rtz(ushort3);\n"
28869"long3 __ovld __cnfn convert_long3_rtp(ushort3);\n"
28870"long3 __ovld __cnfn convert_long3_sat_rtp(ushort3);\n"
28871"long3 __ovld __cnfn convert_long3_rtn(ushort3);\n"
28872"long3 __ovld __cnfn convert_long3_sat_rtn(ushort3);\n"
28873"long3 __ovld __cnfn convert_long3(ushort3);\n"
28874"long3 __ovld __cnfn convert_long3_sat(ushort3);\n"
28875"long3 __ovld __cnfn convert_long3_rte(int3);\n"
28876"long3 __ovld __cnfn convert_long3_sat_rte(int3);\n"
28877"long3 __ovld __cnfn convert_long3_rtz(int3);\n"
28878"long3 __ovld __cnfn convert_long3_sat_rtz(int3);\n"
28879"long3 __ovld __cnfn convert_long3_rtp(int3);\n"
28880"long3 __ovld __cnfn convert_long3_sat_rtp(int3);\n"
28881"long3 __ovld __cnfn convert_long3_rtn(int3);\n"
28882"long3 __ovld __cnfn convert_long3_sat_rtn(int3);\n"
28883"long3 __ovld __cnfn convert_long3(int3);\n"
28884"long3 __ovld __cnfn convert_long3_sat(int3);\n"
28885"long3 __ovld __cnfn convert_long3_rte(uint3);\n"
28886"long3 __ovld __cnfn convert_long3_sat_rte(uint3);\n"
28887"long3 __ovld __cnfn convert_long3_rtz(uint3);\n"
28888"long3 __ovld __cnfn convert_long3_sat_rtz(uint3);\n"
28889"long3 __ovld __cnfn convert_long3_rtp(uint3);\n"
28890"long3 __ovld __cnfn convert_long3_sat_rtp(uint3);\n"
28891"long3 __ovld __cnfn convert_long3_rtn(uint3);\n"
28892"long3 __ovld __cnfn convert_long3_sat_rtn(uint3);\n"
28893"long3 __ovld __cnfn convert_long3(uint3);\n"
28894"long3 __ovld __cnfn convert_long3_sat(uint3);\n"
28895"long3 __ovld __cnfn convert_long3_rte(long3);\n"
28896"long3 __ovld __cnfn convert_long3_sat_rte(long3);\n"
28897"long3 __ovld __cnfn convert_long3_rtz(long3);\n"
28898"long3 __ovld __cnfn convert_long3_sat_rtz(long3);\n"
28899"long3 __ovld __cnfn convert_long3_rtp(long3);\n"
28900"long3 __ovld __cnfn convert_long3_sat_rtp(long3);\n"
28901"long3 __ovld __cnfn convert_long3_rtn(long3);\n"
28902"long3 __ovld __cnfn convert_long3_sat_rtn(long3);\n"
28903"long3 __ovld __cnfn convert_long3(long3);\n"
28904"long3 __ovld __cnfn convert_long3_sat(long3);\n"
28905"long3 __ovld __cnfn convert_long3_rte(ulong3);\n"
28906"long3 __ovld __cnfn convert_long3_sat_rte(ulong3);\n"
28907"long3 __ovld __cnfn convert_long3_rtz(ulong3);\n"
28908"long3 __ovld __cnfn convert_long3_sat_rtz(ulong3);\n"
28909"long3 __ovld __cnfn convert_long3_rtp(ulong3);\n"
28910"long3 __ovld __cnfn convert_long3_sat_rtp(ulong3);\n"
28911"long3 __ovld __cnfn convert_long3_rtn(ulong3);\n"
28912"long3 __ovld __cnfn convert_long3_sat_rtn(ulong3);\n"
28913"long3 __ovld __cnfn convert_long3(ulong3);\n"
28914"long3 __ovld __cnfn convert_long3_sat(ulong3);\n"
28915"long3 __ovld __cnfn convert_long3_rte(float3);\n"
28916"long3 __ovld __cnfn convert_long3_sat_rte(float3);\n"
28917"long3 __ovld __cnfn convert_long3_rtz(float3);\n"
28918"long3 __ovld __cnfn convert_long3_sat_rtz(float3);\n"
28919"long3 __ovld __cnfn convert_long3_rtp(float3);\n"
28920"long3 __ovld __cnfn convert_long3_sat_rtp(float3);\n"
28921"long3 __ovld __cnfn convert_long3_rtn(float3);\n"
28922"long3 __ovld __cnfn convert_long3_sat_rtn(float3);\n"
28923"long3 __ovld __cnfn convert_long3(float3);\n"
28924"long3 __ovld __cnfn convert_long3_sat(float3);\n"
28925"ulong3 __ovld __cnfn convert_ulong3_rte(char3);\n"
28926"ulong3 __ovld __cnfn convert_ulong3_sat_rte(char3);\n"
28927"ulong3 __ovld __cnfn convert_ulong3_rtz(char3);\n"
28928"ulong3 __ovld __cnfn convert_ulong3_sat_rtz(char3);\n"
28929"ulong3 __ovld __cnfn convert_ulong3_rtp(char3);\n"
28930"ulong3 __ovld __cnfn convert_ulong3_sat_rtp(char3);\n"
28931"ulong3 __ovld __cnfn convert_ulong3_rtn(char3);\n"
28932"ulong3 __ovld __cnfn convert_ulong3_sat_rtn(char3);\n"
28933"ulong3 __ovld __cnfn convert_ulong3(char3);\n"
28934"ulong3 __ovld __cnfn convert_ulong3_sat(char3);\n"
28935"ulong3 __ovld __cnfn convert_ulong3_rte(uchar3);\n"
28936"ulong3 __ovld __cnfn convert_ulong3_sat_rte(uchar3);\n"
28937"ulong3 __ovld __cnfn convert_ulong3_rtz(uchar3);\n"
28938"ulong3 __ovld __cnfn convert_ulong3_sat_rtz(uchar3);\n"
28939"ulong3 __ovld __cnfn convert_ulong3_rtp(uchar3);\n"
28940"ulong3 __ovld __cnfn convert_ulong3_sat_rtp(uchar3);\n"
28941"ulong3 __ovld __cnfn convert_ulong3_rtn(uchar3);\n"
28942"ulong3 __ovld __cnfn convert_ulong3_sat_rtn(uchar3);\n"
28943"ulong3 __ovld __cnfn convert_ulong3(uchar3);\n"
28944"ulong3 __ovld __cnfn convert_ulong3_sat(uchar3);\n"
28945"ulong3 __ovld __cnfn convert_ulong3_rte(short3);\n"
28946"ulong3 __ovld __cnfn convert_ulong3_sat_rte(short3);\n"
28947"ulong3 __ovld __cnfn convert_ulong3_rtz(short3);\n"
28948"ulong3 __ovld __cnfn convert_ulong3_sat_rtz(short3);\n"
28949"ulong3 __ovld __cnfn convert_ulong3_rtp(short3);\n"
28950"ulong3 __ovld __cnfn convert_ulong3_sat_rtp(short3);\n"
28951"ulong3 __ovld __cnfn convert_ulong3_rtn(short3);\n"
28952"ulong3 __ovld __cnfn convert_ulong3_sat_rtn(short3);\n"
28953"ulong3 __ovld __cnfn convert_ulong3(short3);\n"
28954"ulong3 __ovld __cnfn convert_ulong3_sat(short3);\n"
28955"ulong3 __ovld __cnfn convert_ulong3_rte(ushort3);\n"
28956"ulong3 __ovld __cnfn convert_ulong3_sat_rte(ushort3);\n"
28957"ulong3 __ovld __cnfn convert_ulong3_rtz(ushort3);\n"
28958"ulong3 __ovld __cnfn convert_ulong3_sat_rtz(ushort3);\n"
28959"ulong3 __ovld __cnfn convert_ulong3_rtp(ushort3);\n"
28960"ulong3 __ovld __cnfn convert_ulong3_sat_rtp(ushort3);\n"
28961"ulong3 __ovld __cnfn convert_ulong3_rtn(ushort3);\n"
28962"ulong3 __ovld __cnfn convert_ulong3_sat_rtn(ushort3);\n"
28963"ulong3 __ovld __cnfn convert_ulong3(ushort3);\n"
28964"ulong3 __ovld __cnfn convert_ulong3_sat(ushort3);\n"
28965"ulong3 __ovld __cnfn convert_ulong3_rte(int3);\n"
28966"ulong3 __ovld __cnfn convert_ulong3_sat_rte(int3);\n"
28967"ulong3 __ovld __cnfn convert_ulong3_rtz(int3);\n"
28968"ulong3 __ovld __cnfn convert_ulong3_sat_rtz(int3);\n"
28969"ulong3 __ovld __cnfn convert_ulong3_rtp(int3);\n"
28970"ulong3 __ovld __cnfn convert_ulong3_sat_rtp(int3);\n"
28971"ulong3 __ovld __cnfn convert_ulong3_rtn(int3);\n"
28972"ulong3 __ovld __cnfn convert_ulong3_sat_rtn(int3);\n"
28973"ulong3 __ovld __cnfn convert_ulong3(int3);\n"
28974"ulong3 __ovld __cnfn convert_ulong3_sat(int3);\n"
28975"ulong3 __ovld __cnfn convert_ulong3_rte(uint3);\n"
28976"ulong3 __ovld __cnfn convert_ulong3_sat_rte(uint3);\n"
28977"ulong3 __ovld __cnfn convert_ulong3_rtz(uint3);\n"
28978"ulong3 __ovld __cnfn convert_ulong3_sat_rtz(uint3);\n"
28979"ulong3 __ovld __cnfn convert_ulong3_rtp(uint3);\n"
28980"ulong3 __ovld __cnfn convert_ulong3_sat_rtp(uint3);\n"
28981"ulong3 __ovld __cnfn convert_ulong3_rtn(uint3);\n"
28982"ulong3 __ovld __cnfn convert_ulong3_sat_rtn(uint3);\n"
28983"ulong3 __ovld __cnfn convert_ulong3(uint3);\n"
28984"ulong3 __ovld __cnfn convert_ulong3_sat(uint3);\n"
28985"ulong3 __ovld __cnfn convert_ulong3_rte(long3);\n"
28986"ulong3 __ovld __cnfn convert_ulong3_sat_rte(long3);\n"
28987"ulong3 __ovld __cnfn convert_ulong3_rtz(long3);\n"
28988"ulong3 __ovld __cnfn convert_ulong3_sat_rtz(long3);\n"
28989"ulong3 __ovld __cnfn convert_ulong3_rtp(long3);\n"
28990"ulong3 __ovld __cnfn convert_ulong3_sat_rtp(long3);\n"
28991"ulong3 __ovld __cnfn convert_ulong3_rtn(long3);\n"
28992"ulong3 __ovld __cnfn convert_ulong3_sat_rtn(long3);\n"
28993"ulong3 __ovld __cnfn convert_ulong3(long3);\n"
28994"ulong3 __ovld __cnfn convert_ulong3_sat(long3);\n"
28995"ulong3 __ovld __cnfn convert_ulong3_rte(ulong3);\n"
28996"ulong3 __ovld __cnfn convert_ulong3_sat_rte(ulong3);\n"
28997"ulong3 __ovld __cnfn convert_ulong3_rtz(ulong3);\n"
28998"ulong3 __ovld __cnfn convert_ulong3_sat_rtz(ulong3);\n"
28999"ulong3 __ovld __cnfn convert_ulong3_rtp(ulong3);\n"
29000"ulong3 __ovld __cnfn convert_ulong3_sat_rtp(ulong3);\n"
29001"ulong3 __ovld __cnfn convert_ulong3_rtn(ulong3);\n"
29002"ulong3 __ovld __cnfn convert_ulong3_sat_rtn(ulong3);\n"
29003"ulong3 __ovld __cnfn convert_ulong3(ulong3);\n"
29004"ulong3 __ovld __cnfn convert_ulong3_sat(ulong3);\n"
29005"ulong3 __ovld __cnfn convert_ulong3_rte(float3);\n"
29006"ulong3 __ovld __cnfn convert_ulong3_sat_rte(float3);\n"
29007"ulong3 __ovld __cnfn convert_ulong3_rtz(float3);\n"
29008"ulong3 __ovld __cnfn convert_ulong3_sat_rtz(float3);\n"
29009"ulong3 __ovld __cnfn convert_ulong3_rtp(float3);\n"
29010"ulong3 __ovld __cnfn convert_ulong3_sat_rtp(float3);\n"
29011"ulong3 __ovld __cnfn convert_ulong3_rtn(float3);\n"
29012"ulong3 __ovld __cnfn convert_ulong3_sat_rtn(float3);\n"
29013"ulong3 __ovld __cnfn convert_ulong3(float3);\n"
29014"ulong3 __ovld __cnfn convert_ulong3_sat(float3);\n"
29015"float3 __ovld __cnfn convert_float3_rte(char3);\n"
29016"float3 __ovld __cnfn convert_float3_rtz(char3);\n"
29017"float3 __ovld __cnfn convert_float3_rtp(char3);\n"
29018"float3 __ovld __cnfn convert_float3_rtn(char3);\n"
29019"float3 __ovld __cnfn convert_float3(char3);\n"
29020"float3 __ovld __cnfn convert_float3_rte(uchar3);\n"
29021"float3 __ovld __cnfn convert_float3_rtz(uchar3);\n"
29022"float3 __ovld __cnfn convert_float3_rtp(uchar3);\n"
29023"float3 __ovld __cnfn convert_float3_rtn(uchar3);\n"
29024"float3 __ovld __cnfn convert_float3(uchar3);\n"
29025"float3 __ovld __cnfn convert_float3_rte(short3);\n"
29026"float3 __ovld __cnfn convert_float3_rtz(short3);\n"
29027"float3 __ovld __cnfn convert_float3_rtp(short3);\n"
29028"float3 __ovld __cnfn convert_float3_rtn(short3);\n"
29029"float3 __ovld __cnfn convert_float3(short3);\n"
29030"float3 __ovld __cnfn convert_float3_rte(ushort3);\n"
29031"float3 __ovld __cnfn convert_float3_rtz(ushort3);\n"
29032"float3 __ovld __cnfn convert_float3_rtp(ushort3);\n"
29033"float3 __ovld __cnfn convert_float3_rtn(ushort3);\n"
29034"float3 __ovld __cnfn convert_float3(ushort3);\n"
29035"float3 __ovld __cnfn convert_float3_rte(int3);\n"
29036"float3 __ovld __cnfn convert_float3_rtz(int3);\n"
29037"float3 __ovld __cnfn convert_float3_rtp(int3);\n"
29038"float3 __ovld __cnfn convert_float3_rtn(int3);\n"
29039"float3 __ovld __cnfn convert_float3(int3);\n"
29040"float3 __ovld __cnfn convert_float3_rte(uint3);\n"
29041"float3 __ovld __cnfn convert_float3_rtz(uint3);\n"
29042"float3 __ovld __cnfn convert_float3_rtp(uint3);\n"
29043"float3 __ovld __cnfn convert_float3_rtn(uint3);\n"
29044"float3 __ovld __cnfn convert_float3(uint3);\n"
29045"float3 __ovld __cnfn convert_float3_rte(long3);\n"
29046"float3 __ovld __cnfn convert_float3_rtz(long3);\n"
29047"float3 __ovld __cnfn convert_float3_rtp(long3);\n"
29048"float3 __ovld __cnfn convert_float3_rtn(long3);\n"
29049"float3 __ovld __cnfn convert_float3(long3);\n"
29050"float3 __ovld __cnfn convert_float3_rte(ulong3);\n"
29051"float3 __ovld __cnfn convert_float3_rtz(ulong3);\n"
29052"float3 __ovld __cnfn convert_float3_rtp(ulong3);\n"
29053"float3 __ovld __cnfn convert_float3_rtn(ulong3);\n"
29054"float3 __ovld __cnfn convert_float3(ulong3);\n"
29055"float3 __ovld __cnfn convert_float3_rte(float3);\n"
29056"float3 __ovld __cnfn convert_float3_rtz(float3);\n"
29057"float3 __ovld __cnfn convert_float3_rtp(float3);\n"
29058"float3 __ovld __cnfn convert_float3_rtn(float3);\n"
29059"float3 __ovld __cnfn convert_float3(float3);\n"
29060"char4 __ovld __cnfn convert_char4_rte(char4);\n"
29061"char4 __ovld __cnfn convert_char4_sat_rte(char4);\n"
29062"char4 __ovld __cnfn convert_char4_rtz(char4);\n"
29063"char4 __ovld __cnfn convert_char4_sat_rtz(char4);\n"
29064"char4 __ovld __cnfn convert_char4_rtp(char4);\n"
29065"char4 __ovld __cnfn convert_char4_sat_rtp(char4);\n"
29066"char4 __ovld __cnfn convert_char4_rtn(char4);\n"
29067"char4 __ovld __cnfn convert_char4_sat_rtn(char4);\n"
29068"char4 __ovld __cnfn convert_char4(char4);\n"
29069"char4 __ovld __cnfn convert_char4_sat(char4);\n"
29070"char4 __ovld __cnfn convert_char4_rte(uchar4);\n"
29071"char4 __ovld __cnfn convert_char4_sat_rte(uchar4);\n"
29072"char4 __ovld __cnfn convert_char4_rtz(uchar4);\n"
29073"char4 __ovld __cnfn convert_char4_sat_rtz(uchar4);\n"
29074"char4 __ovld __cnfn convert_char4_rtp(uchar4);\n"
29075"char4 __ovld __cnfn convert_char4_sat_rtp(uchar4);\n"
29076"char4 __ovld __cnfn convert_char4_rtn(uchar4);\n"
29077"char4 __ovld __cnfn convert_char4_sat_rtn(uchar4);\n"
29078"char4 __ovld __cnfn convert_char4(uchar4);\n"
29079"char4 __ovld __cnfn convert_char4_sat(uchar4);\n"
29080"char4 __ovld __cnfn convert_char4_rte(short4);\n"
29081"char4 __ovld __cnfn convert_char4_sat_rte(short4);\n"
29082"char4 __ovld __cnfn convert_char4_rtz(short4);\n"
29083"char4 __ovld __cnfn convert_char4_sat_rtz(short4);\n"
29084"char4 __ovld __cnfn convert_char4_rtp(short4);\n"
29085"char4 __ovld __cnfn convert_char4_sat_rtp(short4);\n"
29086"char4 __ovld __cnfn convert_char4_rtn(short4);\n"
29087"char4 __ovld __cnfn convert_char4_sat_rtn(short4);\n"
29088"char4 __ovld __cnfn convert_char4(short4);\n"
29089"char4 __ovld __cnfn convert_char4_sat(short4);\n"
29090"char4 __ovld __cnfn convert_char4_rte(ushort4);\n"
29091"char4 __ovld __cnfn convert_char4_sat_rte(ushort4);\n"
29092"char4 __ovld __cnfn convert_char4_rtz(ushort4);\n"
29093"char4 __ovld __cnfn convert_char4_sat_rtz(ushort4);\n"
29094"char4 __ovld __cnfn convert_char4_rtp(ushort4);\n"
29095"char4 __ovld __cnfn convert_char4_sat_rtp(ushort4);\n"
29096"char4 __ovld __cnfn convert_char4_rtn(ushort4);\n"
29097"char4 __ovld __cnfn convert_char4_sat_rtn(ushort4);\n"
29098"char4 __ovld __cnfn convert_char4(ushort4);\n"
29099"char4 __ovld __cnfn convert_char4_sat(ushort4);\n"
29100"char4 __ovld __cnfn convert_char4_rte(int4);\n"
29101"char4 __ovld __cnfn convert_char4_sat_rte(int4);\n"
29102"char4 __ovld __cnfn convert_char4_rtz(int4);\n"
29103"char4 __ovld __cnfn convert_char4_sat_rtz(int4);\n"
29104"char4 __ovld __cnfn convert_char4_rtp(int4);\n"
29105"char4 __ovld __cnfn convert_char4_sat_rtp(int4);\n"
29106"char4 __ovld __cnfn convert_char4_rtn(int4);\n"
29107"char4 __ovld __cnfn convert_char4_sat_rtn(int4);\n"
29108"char4 __ovld __cnfn convert_char4(int4);\n"
29109"char4 __ovld __cnfn convert_char4_sat(int4);\n"
29110"char4 __ovld __cnfn convert_char4_rte(uint4);\n"
29111"char4 __ovld __cnfn convert_char4_sat_rte(uint4);\n"
29112"char4 __ovld __cnfn convert_char4_rtz(uint4);\n"
29113"char4 __ovld __cnfn convert_char4_sat_rtz(uint4);\n"
29114"char4 __ovld __cnfn convert_char4_rtp(uint4);\n"
29115"char4 __ovld __cnfn convert_char4_sat_rtp(uint4);\n"
29116"char4 __ovld __cnfn convert_char4_rtn(uint4);\n"
29117"char4 __ovld __cnfn convert_char4_sat_rtn(uint4);\n"
29118"char4 __ovld __cnfn convert_char4(uint4);\n"
29119"char4 __ovld __cnfn convert_char4_sat(uint4);\n"
29120"char4 __ovld __cnfn convert_char4_rte(long4);\n"
29121"char4 __ovld __cnfn convert_char4_sat_rte(long4);\n"
29122"char4 __ovld __cnfn convert_char4_rtz(long4);\n"
29123"char4 __ovld __cnfn convert_char4_sat_rtz(long4);\n"
29124"char4 __ovld __cnfn convert_char4_rtp(long4);\n"
29125"char4 __ovld __cnfn convert_char4_sat_rtp(long4);\n"
29126"char4 __ovld __cnfn convert_char4_rtn(long4);\n"
29127"char4 __ovld __cnfn convert_char4_sat_rtn(long4);\n"
29128"char4 __ovld __cnfn convert_char4(long4);\n"
29129"char4 __ovld __cnfn convert_char4_sat(long4);\n"
29130"char4 __ovld __cnfn convert_char4_rte(ulong4);\n"
29131"char4 __ovld __cnfn convert_char4_sat_rte(ulong4);\n"
29132"char4 __ovld __cnfn convert_char4_rtz(ulong4);\n"
29133"char4 __ovld __cnfn convert_char4_sat_rtz(ulong4);\n"
29134"char4 __ovld __cnfn convert_char4_rtp(ulong4);\n"
29135"char4 __ovld __cnfn convert_char4_sat_rtp(ulong4);\n"
29136"char4 __ovld __cnfn convert_char4_rtn(ulong4);\n"
29137"char4 __ovld __cnfn convert_char4_sat_rtn(ulong4);\n"
29138"char4 __ovld __cnfn convert_char4(ulong4);\n"
29139"char4 __ovld __cnfn convert_char4_sat(ulong4);\n"
29140"char4 __ovld __cnfn convert_char4_rte(float4);\n"
29141"char4 __ovld __cnfn convert_char4_sat_rte(float4);\n"
29142"char4 __ovld __cnfn convert_char4_rtz(float4);\n"
29143"char4 __ovld __cnfn convert_char4_sat_rtz(float4);\n"
29144"char4 __ovld __cnfn convert_char4_rtp(float4);\n"
29145"char4 __ovld __cnfn convert_char4_sat_rtp(float4);\n"
29146"char4 __ovld __cnfn convert_char4_rtn(float4);\n"
29147"char4 __ovld __cnfn convert_char4_sat_rtn(float4);\n"
29148"char4 __ovld __cnfn convert_char4(float4);\n"
29149"char4 __ovld __cnfn convert_char4_sat(float4);\n"
29150"uchar4 __ovld __cnfn convert_uchar4_rte(char4);\n"
29151"uchar4 __ovld __cnfn convert_uchar4_sat_rte(char4);\n"
29152"uchar4 __ovld __cnfn convert_uchar4_rtz(char4);\n"
29153"uchar4 __ovld __cnfn convert_uchar4_sat_rtz(char4);\n"
29154"uchar4 __ovld __cnfn convert_uchar4_rtp(char4);\n"
29155"uchar4 __ovld __cnfn convert_uchar4_sat_rtp(char4);\n"
29156"uchar4 __ovld __cnfn convert_uchar4_rtn(char4);\n"
29157"uchar4 __ovld __cnfn convert_uchar4_sat_rtn(char4);\n"
29158"uchar4 __ovld __cnfn convert_uchar4(char4);\n"
29159"uchar4 __ovld __cnfn convert_uchar4_sat(char4);\n"
29160"uchar4 __ovld __cnfn convert_uchar4_rte(uchar4);\n"
29161"uchar4 __ovld __cnfn convert_uchar4_sat_rte(uchar4);\n"
29162"uchar4 __ovld __cnfn convert_uchar4_rtz(uchar4);\n"
29163"uchar4 __ovld __cnfn convert_uchar4_sat_rtz(uchar4);\n"
29164"uchar4 __ovld __cnfn convert_uchar4_rtp(uchar4);\n"
29165"uchar4 __ovld __cnfn convert_uchar4_sat_rtp(uchar4);\n"
29166"uchar4 __ovld __cnfn convert_uchar4_rtn(uchar4);\n"
29167"uchar4 __ovld __cnfn convert_uchar4_sat_rtn(uchar4);\n"
29168"uchar4 __ovld __cnfn convert_uchar4(uchar4);\n"
29169"uchar4 __ovld __cnfn convert_uchar4_sat(uchar4);\n"
29170"uchar4 __ovld __cnfn convert_uchar4_rte(short4);\n"
29171"uchar4 __ovld __cnfn convert_uchar4_sat_rte(short4);\n"
29172"uchar4 __ovld __cnfn convert_uchar4_rtz(short4);\n"
29173"uchar4 __ovld __cnfn convert_uchar4_sat_rtz(short4);\n"
29174"uchar4 __ovld __cnfn convert_uchar4_rtp(short4);\n"
29175"uchar4 __ovld __cnfn convert_uchar4_sat_rtp(short4);\n"
29176"uchar4 __ovld __cnfn convert_uchar4_rtn(short4);\n"
29177"uchar4 __ovld __cnfn convert_uchar4_sat_rtn(short4);\n"
29178"uchar4 __ovld __cnfn convert_uchar4(short4);\n"
29179"uchar4 __ovld __cnfn convert_uchar4_sat(short4);\n"
29180"uchar4 __ovld __cnfn convert_uchar4_rte(ushort4);\n"
29181"uchar4 __ovld __cnfn convert_uchar4_sat_rte(ushort4);\n"
29182"uchar4 __ovld __cnfn convert_uchar4_rtz(ushort4);\n"
29183"uchar4 __ovld __cnfn convert_uchar4_sat_rtz(ushort4);\n"
29184"uchar4 __ovld __cnfn convert_uchar4_rtp(ushort4);\n"
29185"uchar4 __ovld __cnfn convert_uchar4_sat_rtp(ushort4);\n"
29186"uchar4 __ovld __cnfn convert_uchar4_rtn(ushort4);\n"
29187"uchar4 __ovld __cnfn convert_uchar4_sat_rtn(ushort4);\n"
29188"uchar4 __ovld __cnfn convert_uchar4(ushort4);\n"
29189"uchar4 __ovld __cnfn convert_uchar4_sat(ushort4);\n"
29190"uchar4 __ovld __cnfn convert_uchar4_rte(int4);\n"
29191"uchar4 __ovld __cnfn convert_uchar4_sat_rte(int4);\n"
29192"uchar4 __ovld __cnfn convert_uchar4_rtz(int4);\n"
29193"uchar4 __ovld __cnfn convert_uchar4_sat_rtz(int4);\n"
29194"uchar4 __ovld __cnfn convert_uchar4_rtp(int4);\n"
29195"uchar4 __ovld __cnfn convert_uchar4_sat_rtp(int4);\n"
29196"uchar4 __ovld __cnfn convert_uchar4_rtn(int4);\n"
29197"uchar4 __ovld __cnfn convert_uchar4_sat_rtn(int4);\n"
29198"uchar4 __ovld __cnfn convert_uchar4(int4);\n"
29199"uchar4 __ovld __cnfn convert_uchar4_sat(int4);\n"
29200"uchar4 __ovld __cnfn convert_uchar4_rte(uint4);\n"
29201"uchar4 __ovld __cnfn convert_uchar4_sat_rte(uint4);\n"
29202"uchar4 __ovld __cnfn convert_uchar4_rtz(uint4);\n"
29203"uchar4 __ovld __cnfn convert_uchar4_sat_rtz(uint4);\n"
29204"uchar4 __ovld __cnfn convert_uchar4_rtp(uint4);\n"
29205"uchar4 __ovld __cnfn convert_uchar4_sat_rtp(uint4);\n"
29206"uchar4 __ovld __cnfn convert_uchar4_rtn(uint4);\n"
29207"uchar4 __ovld __cnfn convert_uchar4_sat_rtn(uint4);\n"
29208"uchar4 __ovld __cnfn convert_uchar4(uint4);\n"
29209"uchar4 __ovld __cnfn convert_uchar4_sat(uint4);\n"
29210"uchar4 __ovld __cnfn convert_uchar4_rte(long4);\n"
29211"uchar4 __ovld __cnfn convert_uchar4_sat_rte(long4);\n"
29212"uchar4 __ovld __cnfn convert_uchar4_rtz(long4);\n"
29213"uchar4 __ovld __cnfn convert_uchar4_sat_rtz(long4);\n"
29214"uchar4 __ovld __cnfn convert_uchar4_rtp(long4);\n"
29215"uchar4 __ovld __cnfn convert_uchar4_sat_rtp(long4);\n"
29216"uchar4 __ovld __cnfn convert_uchar4_rtn(long4);\n"
29217"uchar4 __ovld __cnfn convert_uchar4_sat_rtn(long4);\n"
29218"uchar4 __ovld __cnfn convert_uchar4(long4);\n"
29219"uchar4 __ovld __cnfn convert_uchar4_sat(long4);\n"
29220"uchar4 __ovld __cnfn convert_uchar4_rte(ulong4);\n"
29221"uchar4 __ovld __cnfn convert_uchar4_sat_rte(ulong4);\n"
29222"uchar4 __ovld __cnfn convert_uchar4_rtz(ulong4);\n"
29223"uchar4 __ovld __cnfn convert_uchar4_sat_rtz(ulong4);\n"
29224"uchar4 __ovld __cnfn convert_uchar4_rtp(ulong4);\n"
29225"uchar4 __ovld __cnfn convert_uchar4_sat_rtp(ulong4);\n"
29226"uchar4 __ovld __cnfn convert_uchar4_rtn(ulong4);\n"
29227"uchar4 __ovld __cnfn convert_uchar4_sat_rtn(ulong4);\n"
29228"uchar4 __ovld __cnfn convert_uchar4(ulong4);\n"
29229"uchar4 __ovld __cnfn convert_uchar4_sat(ulong4);\n"
29230"uchar4 __ovld __cnfn convert_uchar4_rte(float4);\n"
29231"uchar4 __ovld __cnfn convert_uchar4_sat_rte(float4);\n"
29232"uchar4 __ovld __cnfn convert_uchar4_rtz(float4);\n"
29233"uchar4 __ovld __cnfn convert_uchar4_sat_rtz(float4);\n"
29234"uchar4 __ovld __cnfn convert_uchar4_rtp(float4);\n"
29235"uchar4 __ovld __cnfn convert_uchar4_sat_rtp(float4);\n"
29236"uchar4 __ovld __cnfn convert_uchar4_rtn(float4);\n"
29237"uchar4 __ovld __cnfn convert_uchar4_sat_rtn(float4);\n"
29238"uchar4 __ovld __cnfn convert_uchar4(float4);\n"
29239"uchar4 __ovld __cnfn convert_uchar4_sat(float4);\n"
29240"short4 __ovld __cnfn convert_short4_rte(char4);\n"
29241"short4 __ovld __cnfn convert_short4_sat_rte(char4);\n"
29242"short4 __ovld __cnfn convert_short4_rtz(char4);\n"
29243"short4 __ovld __cnfn convert_short4_sat_rtz(char4);\n"
29244"short4 __ovld __cnfn convert_short4_rtp(char4);\n"
29245"short4 __ovld __cnfn convert_short4_sat_rtp(char4);\n"
29246"short4 __ovld __cnfn convert_short4_rtn(char4);\n"
29247"short4 __ovld __cnfn convert_short4_sat_rtn(char4);\n"
29248"short4 __ovld __cnfn convert_short4(char4);\n"
29249"short4 __ovld __cnfn convert_short4_sat(char4);\n"
29250"short4 __ovld __cnfn convert_short4_rte(uchar4);\n"
29251"short4 __ovld __cnfn convert_short4_sat_rte(uchar4);\n"
29252"short4 __ovld __cnfn convert_short4_rtz(uchar4);\n"
29253"short4 __ovld __cnfn convert_short4_sat_rtz(uchar4);\n"
29254"short4 __ovld __cnfn convert_short4_rtp(uchar4);\n"
29255"short4 __ovld __cnfn convert_short4_sat_rtp(uchar4);\n"
29256"short4 __ovld __cnfn convert_short4_rtn(uchar4);\n"
29257"short4 __ovld __cnfn convert_short4_sat_rtn(uchar4);\n"
29258"short4 __ovld __cnfn convert_short4(uchar4);\n"
29259"short4 __ovld __cnfn convert_short4_sat(uchar4);\n"
29260"short4 __ovld __cnfn convert_short4_rte(short4);\n"
29261"short4 __ovld __cnfn convert_short4_sat_rte(short4);\n"
29262"short4 __ovld __cnfn convert_short4_rtz(short4);\n"
29263"short4 __ovld __cnfn convert_short4_sat_rtz(short4);\n"
29264"short4 __ovld __cnfn convert_short4_rtp(short4);\n"
29265"short4 __ovld __cnfn convert_short4_sat_rtp(short4);\n"
29266"short4 __ovld __cnfn convert_short4_rtn(short4);\n"
29267"short4 __ovld __cnfn convert_short4_sat_rtn(short4);\n"
29268"short4 __ovld __cnfn convert_short4(short4);\n"
29269"short4 __ovld __cnfn convert_short4_sat(short4);\n"
29270"short4 __ovld __cnfn convert_short4_rte(ushort4);\n"
29271"short4 __ovld __cnfn convert_short4_sat_rte(ushort4);\n"
29272"short4 __ovld __cnfn convert_short4_rtz(ushort4);\n"
29273"short4 __ovld __cnfn convert_short4_sat_rtz(ushort4);\n"
29274"short4 __ovld __cnfn convert_short4_rtp(ushort4);\n"
29275"short4 __ovld __cnfn convert_short4_sat_rtp(ushort4);\n"
29276"short4 __ovld __cnfn convert_short4_rtn(ushort4);\n"
29277"short4 __ovld __cnfn convert_short4_sat_rtn(ushort4);\n"
29278"short4 __ovld __cnfn convert_short4(ushort4);\n"
29279"short4 __ovld __cnfn convert_short4_sat(ushort4);\n"
29280"short4 __ovld __cnfn convert_short4_rte(int4);\n"
29281"short4 __ovld __cnfn convert_short4_sat_rte(int4);\n"
29282"short4 __ovld __cnfn convert_short4_rtz(int4);\n"
29283"short4 __ovld __cnfn convert_short4_sat_rtz(int4);\n"
29284"short4 __ovld __cnfn convert_short4_rtp(int4);\n"
29285"short4 __ovld __cnfn convert_short4_sat_rtp(int4);\n"
29286"short4 __ovld __cnfn convert_short4_rtn(int4);\n"
29287"short4 __ovld __cnfn convert_short4_sat_rtn(int4);\n"
29288"short4 __ovld __cnfn convert_short4(int4);\n"
29289"short4 __ovld __cnfn convert_short4_sat(int4);\n"
29290"short4 __ovld __cnfn convert_short4_rte(uint4);\n"
29291"short4 __ovld __cnfn convert_short4_sat_rte(uint4);\n"
29292"short4 __ovld __cnfn convert_short4_rtz(uint4);\n"
29293"short4 __ovld __cnfn convert_short4_sat_rtz(uint4);\n"
29294"short4 __ovld __cnfn convert_short4_rtp(uint4);\n"
29295"short4 __ovld __cnfn convert_short4_sat_rtp(uint4);\n"
29296"short4 __ovld __cnfn convert_short4_rtn(uint4);\n"
29297"short4 __ovld __cnfn convert_short4_sat_rtn(uint4);\n"
29298"short4 __ovld __cnfn convert_short4(uint4);\n"
29299"short4 __ovld __cnfn convert_short4_sat(uint4);\n"
29300"short4 __ovld __cnfn convert_short4_rte(long4);\n"
29301"short4 __ovld __cnfn convert_short4_sat_rte(long4);\n"
29302"short4 __ovld __cnfn convert_short4_rtz(long4);\n"
29303"short4 __ovld __cnfn convert_short4_sat_rtz(long4);\n"
29304"short4 __ovld __cnfn convert_short4_rtp(long4);\n"
29305"short4 __ovld __cnfn convert_short4_sat_rtp(long4);\n"
29306"short4 __ovld __cnfn convert_short4_rtn(long4);\n"
29307"short4 __ovld __cnfn convert_short4_sat_rtn(long4);\n"
29308"short4 __ovld __cnfn convert_short4(long4);\n"
29309"short4 __ovld __cnfn convert_short4_sat(long4);\n"
29310"short4 __ovld __cnfn convert_short4_rte(ulong4);\n"
29311"short4 __ovld __cnfn convert_short4_sat_rte(ulong4);\n"
29312"short4 __ovld __cnfn convert_short4_rtz(ulong4);\n"
29313"short4 __ovld __cnfn convert_short4_sat_rtz(ulong4);\n"
29314"short4 __ovld __cnfn convert_short4_rtp(ulong4);\n"
29315"short4 __ovld __cnfn convert_short4_sat_rtp(ulong4);\n"
29316"short4 __ovld __cnfn convert_short4_rtn(ulong4);\n"
29317"short4 __ovld __cnfn convert_short4_sat_rtn(ulong4);\n"
29318"short4 __ovld __cnfn convert_short4(ulong4);\n"
29319"short4 __ovld __cnfn convert_short4_sat(ulong4);\n"
29320"short4 __ovld __cnfn convert_short4_rte(float4);\n"
29321"short4 __ovld __cnfn convert_short4_sat_rte(float4);\n"
29322"short4 __ovld __cnfn convert_short4_rtz(float4);\n"
29323"short4 __ovld __cnfn convert_short4_sat_rtz(float4);\n"
29324"short4 __ovld __cnfn convert_short4_rtp(float4);\n"
29325"short4 __ovld __cnfn convert_short4_sat_rtp(float4);\n"
29326"short4 __ovld __cnfn convert_short4_rtn(float4);\n"
29327"short4 __ovld __cnfn convert_short4_sat_rtn(float4);\n"
29328"short4 __ovld __cnfn convert_short4(float4);\n"
29329"short4 __ovld __cnfn convert_short4_sat(float4);\n"
29330"ushort4 __ovld __cnfn convert_ushort4_rte(char4);\n"
29331"ushort4 __ovld __cnfn convert_ushort4_sat_rte(char4);\n"
29332"ushort4 __ovld __cnfn convert_ushort4_rtz(char4);\n"
29333"ushort4 __ovld __cnfn convert_ushort4_sat_rtz(char4);\n"
29334"ushort4 __ovld __cnfn convert_ushort4_rtp(char4);\n"
29335"ushort4 __ovld __cnfn convert_ushort4_sat_rtp(char4);\n"
29336"ushort4 __ovld __cnfn convert_ushort4_rtn(char4);\n"
29337"ushort4 __ovld __cnfn convert_ushort4_sat_rtn(char4);\n"
29338"ushort4 __ovld __cnfn convert_ushort4(char4);\n"
29339"ushort4 __ovld __cnfn convert_ushort4_sat(char4);\n"
29340"ushort4 __ovld __cnfn convert_ushort4_rte(uchar4);\n"
29341"ushort4 __ovld __cnfn convert_ushort4_sat_rte(uchar4);\n"
29342"ushort4 __ovld __cnfn convert_ushort4_rtz(uchar4);\n"
29343"ushort4 __ovld __cnfn convert_ushort4_sat_rtz(uchar4);\n"
29344"ushort4 __ovld __cnfn convert_ushort4_rtp(uchar4);\n"
29345"ushort4 __ovld __cnfn convert_ushort4_sat_rtp(uchar4);\n"
29346"ushort4 __ovld __cnfn convert_ushort4_rtn(uchar4);\n"
29347"ushort4 __ovld __cnfn convert_ushort4_sat_rtn(uchar4);\n"
29348"ushort4 __ovld __cnfn convert_ushort4(uchar4);\n"
29349"ushort4 __ovld __cnfn convert_ushort4_sat(uchar4);\n"
29350"ushort4 __ovld __cnfn convert_ushort4_rte(short4);\n"
29351"ushort4 __ovld __cnfn convert_ushort4_sat_rte(short4);\n"
29352"ushort4 __ovld __cnfn convert_ushort4_rtz(short4);\n"
29353"ushort4 __ovld __cnfn convert_ushort4_sat_rtz(short4);\n"
29354"ushort4 __ovld __cnfn convert_ushort4_rtp(short4);\n"
29355"ushort4 __ovld __cnfn convert_ushort4_sat_rtp(short4);\n"
29356"ushort4 __ovld __cnfn convert_ushort4_rtn(short4);\n"
29357"ushort4 __ovld __cnfn convert_ushort4_sat_rtn(short4);\n"
29358"ushort4 __ovld __cnfn convert_ushort4(short4);\n"
29359"ushort4 __ovld __cnfn convert_ushort4_sat(short4);\n"
29360"ushort4 __ovld __cnfn convert_ushort4_rte(ushort4);\n"
29361"ushort4 __ovld __cnfn convert_ushort4_sat_rte(ushort4);\n"
29362"ushort4 __ovld __cnfn convert_ushort4_rtz(ushort4);\n"
29363"ushort4 __ovld __cnfn convert_ushort4_sat_rtz(ushort4);\n"
29364"ushort4 __ovld __cnfn convert_ushort4_rtp(ushort4);\n"
29365"ushort4 __ovld __cnfn convert_ushort4_sat_rtp(ushort4);\n"
29366"ushort4 __ovld __cnfn convert_ushort4_rtn(ushort4);\n"
29367"ushort4 __ovld __cnfn convert_ushort4_sat_rtn(ushort4);\n"
29368"ushort4 __ovld __cnfn convert_ushort4(ushort4);\n"
29369"ushort4 __ovld __cnfn convert_ushort4_sat(ushort4);\n"
29370"ushort4 __ovld __cnfn convert_ushort4_rte(int4);\n"
29371"ushort4 __ovld __cnfn convert_ushort4_sat_rte(int4);\n"
29372"ushort4 __ovld __cnfn convert_ushort4_rtz(int4);\n"
29373"ushort4 __ovld __cnfn convert_ushort4_sat_rtz(int4);\n"
29374"ushort4 __ovld __cnfn convert_ushort4_rtp(int4);\n"
29375"ushort4 __ovld __cnfn convert_ushort4_sat_rtp(int4);\n"
29376"ushort4 __ovld __cnfn convert_ushort4_rtn(int4);\n"
29377"ushort4 __ovld __cnfn convert_ushort4_sat_rtn(int4);\n"
29378"ushort4 __ovld __cnfn convert_ushort4(int4);\n"
29379"ushort4 __ovld __cnfn convert_ushort4_sat(int4);\n"
29380"ushort4 __ovld __cnfn convert_ushort4_rte(uint4);\n"
29381"ushort4 __ovld __cnfn convert_ushort4_sat_rte(uint4);\n"
29382"ushort4 __ovld __cnfn convert_ushort4_rtz(uint4);\n"
29383"ushort4 __ovld __cnfn convert_ushort4_sat_rtz(uint4);\n"
29384"ushort4 __ovld __cnfn convert_ushort4_rtp(uint4);\n"
29385"ushort4 __ovld __cnfn convert_ushort4_sat_rtp(uint4);\n"
29386"ushort4 __ovld __cnfn convert_ushort4_rtn(uint4);\n"
29387"ushort4 __ovld __cnfn convert_ushort4_sat_rtn(uint4);\n"
29388"ushort4 __ovld __cnfn convert_ushort4(uint4);\n"
29389"ushort4 __ovld __cnfn convert_ushort4_sat(uint4);\n"
29390"ushort4 __ovld __cnfn convert_ushort4_rte(long4);\n"
29391"ushort4 __ovld __cnfn convert_ushort4_sat_rte(long4);\n"
29392"ushort4 __ovld __cnfn convert_ushort4_rtz(long4);\n"
29393"ushort4 __ovld __cnfn convert_ushort4_sat_rtz(long4);\n"
29394"ushort4 __ovld __cnfn convert_ushort4_rtp(long4);\n"
29395"ushort4 __ovld __cnfn convert_ushort4_sat_rtp(long4);\n"
29396"ushort4 __ovld __cnfn convert_ushort4_rtn(long4);\n"
29397"ushort4 __ovld __cnfn convert_ushort4_sat_rtn(long4);\n"
29398"ushort4 __ovld __cnfn convert_ushort4(long4);\n"
29399"ushort4 __ovld __cnfn convert_ushort4_sat(long4);\n"
29400"ushort4 __ovld __cnfn convert_ushort4_rte(ulong4);\n"
29401"ushort4 __ovld __cnfn convert_ushort4_sat_rte(ulong4);\n"
29402"ushort4 __ovld __cnfn convert_ushort4_rtz(ulong4);\n"
29403"ushort4 __ovld __cnfn convert_ushort4_sat_rtz(ulong4);\n"
29404"ushort4 __ovld __cnfn convert_ushort4_rtp(ulong4);\n"
29405"ushort4 __ovld __cnfn convert_ushort4_sat_rtp(ulong4);\n"
29406"ushort4 __ovld __cnfn convert_ushort4_rtn(ulong4);\n"
29407"ushort4 __ovld __cnfn convert_ushort4_sat_rtn(ulong4);\n"
29408"ushort4 __ovld __cnfn convert_ushort4(ulong4);\n"
29409"ushort4 __ovld __cnfn convert_ushort4_sat(ulong4);\n"
29410"ushort4 __ovld __cnfn convert_ushort4_rte(float4);\n"
29411"ushort4 __ovld __cnfn convert_ushort4_sat_rte(float4);\n"
29412"ushort4 __ovld __cnfn convert_ushort4_rtz(float4);\n"
29413"ushort4 __ovld __cnfn convert_ushort4_sat_rtz(float4);\n"
29414"ushort4 __ovld __cnfn convert_ushort4_rtp(float4);\n"
29415"ushort4 __ovld __cnfn convert_ushort4_sat_rtp(float4);\n"
29416"ushort4 __ovld __cnfn convert_ushort4_rtn(float4);\n"
29417"ushort4 __ovld __cnfn convert_ushort4_sat_rtn(float4);\n"
29418"ushort4 __ovld __cnfn convert_ushort4(float4);\n"
29419"ushort4 __ovld __cnfn convert_ushort4_sat(float4);\n"
29420"int4 __ovld __cnfn convert_int4_rte(char4);\n"
29421"int4 __ovld __cnfn convert_int4_sat_rte(char4);\n"
29422"int4 __ovld __cnfn convert_int4_rtz(char4);\n"
29423"int4 __ovld __cnfn convert_int4_sat_rtz(char4);\n"
29424"int4 __ovld __cnfn convert_int4_rtp(char4);\n"
29425"int4 __ovld __cnfn convert_int4_sat_rtp(char4);\n"
29426"int4 __ovld __cnfn convert_int4_rtn(char4);\n"
29427"int4 __ovld __cnfn convert_int4_sat_rtn(char4);\n"
29428"int4 __ovld __cnfn convert_int4(char4);\n"
29429"int4 __ovld __cnfn convert_int4_sat(char4);\n"
29430"int4 __ovld __cnfn convert_int4_rte(uchar4);\n"
29431"int4 __ovld __cnfn convert_int4_sat_rte(uchar4);\n"
29432"int4 __ovld __cnfn convert_int4_rtz(uchar4);\n"
29433"int4 __ovld __cnfn convert_int4_sat_rtz(uchar4);\n"
29434"int4 __ovld __cnfn convert_int4_rtp(uchar4);\n"
29435"int4 __ovld __cnfn convert_int4_sat_rtp(uchar4);\n"
29436"int4 __ovld __cnfn convert_int4_rtn(uchar4);\n"
29437"int4 __ovld __cnfn convert_int4_sat_rtn(uchar4);\n"
29438"int4 __ovld __cnfn convert_int4(uchar4);\n"
29439"int4 __ovld __cnfn convert_int4_sat(uchar4);\n"
29440"int4 __ovld __cnfn convert_int4_rte(short4);\n"
29441"int4 __ovld __cnfn convert_int4_sat_rte(short4);\n"
29442"int4 __ovld __cnfn convert_int4_rtz(short4);\n"
29443"int4 __ovld __cnfn convert_int4_sat_rtz(short4);\n"
29444"int4 __ovld __cnfn convert_int4_rtp(short4);\n"
29445"int4 __ovld __cnfn convert_int4_sat_rtp(short4);\n"
29446"int4 __ovld __cnfn convert_int4_rtn(short4);\n"
29447"int4 __ovld __cnfn convert_int4_sat_rtn(short4);\n"
29448"int4 __ovld __cnfn convert_int4(short4);\n"
29449"int4 __ovld __cnfn convert_int4_sat(short4);\n"
29450"int4 __ovld __cnfn convert_int4_rte(ushort4);\n"
29451"int4 __ovld __cnfn convert_int4_sat_rte(ushort4);\n"
29452"int4 __ovld __cnfn convert_int4_rtz(ushort4);\n"
29453"int4 __ovld __cnfn convert_int4_sat_rtz(ushort4);\n"
29454"int4 __ovld __cnfn convert_int4_rtp(ushort4);\n"
29455"int4 __ovld __cnfn convert_int4_sat_rtp(ushort4);\n"
29456"int4 __ovld __cnfn convert_int4_rtn(ushort4);\n"
29457"int4 __ovld __cnfn convert_int4_sat_rtn(ushort4);\n"
29458"int4 __ovld __cnfn convert_int4(ushort4);\n"
29459"int4 __ovld __cnfn convert_int4_sat(ushort4);\n"
29460"int4 __ovld __cnfn convert_int4_rte(int4);\n"
29461"int4 __ovld __cnfn convert_int4_sat_rte(int4);\n"
29462"int4 __ovld __cnfn convert_int4_rtz(int4);\n"
29463"int4 __ovld __cnfn convert_int4_sat_rtz(int4);\n"
29464"int4 __ovld __cnfn convert_int4_rtp(int4);\n"
29465"int4 __ovld __cnfn convert_int4_sat_rtp(int4);\n"
29466"int4 __ovld __cnfn convert_int4_rtn(int4);\n"
29467"int4 __ovld __cnfn convert_int4_sat_rtn(int4);\n"
29468"int4 __ovld __cnfn convert_int4(int4);\n"
29469"int4 __ovld __cnfn convert_int4_sat(int4);\n"
29470"int4 __ovld __cnfn convert_int4_rte(uint4);\n"
29471"int4 __ovld __cnfn convert_int4_sat_rte(uint4);\n"
29472"int4 __ovld __cnfn convert_int4_rtz(uint4);\n"
29473"int4 __ovld __cnfn convert_int4_sat_rtz(uint4);\n"
29474"int4 __ovld __cnfn convert_int4_rtp(uint4);\n"
29475"int4 __ovld __cnfn convert_int4_sat_rtp(uint4);\n"
29476"int4 __ovld __cnfn convert_int4_rtn(uint4);\n"
29477"int4 __ovld __cnfn convert_int4_sat_rtn(uint4);\n"
29478"int4 __ovld __cnfn convert_int4(uint4);\n"
29479"int4 __ovld __cnfn convert_int4_sat(uint4);\n"
29480"int4 __ovld __cnfn convert_int4_rte(long4);\n"
29481"int4 __ovld __cnfn convert_int4_sat_rte(long4);\n"
29482"int4 __ovld __cnfn convert_int4_rtz(long4);\n"
29483"int4 __ovld __cnfn convert_int4_sat_rtz(long4);\n"
29484"int4 __ovld __cnfn convert_int4_rtp(long4);\n"
29485"int4 __ovld __cnfn convert_int4_sat_rtp(long4);\n"
29486"int4 __ovld __cnfn convert_int4_rtn(long4);\n"
29487"int4 __ovld __cnfn convert_int4_sat_rtn(long4);\n"
29488"int4 __ovld __cnfn convert_int4(long4);\n"
29489"int4 __ovld __cnfn convert_int4_sat(long4);\n"
29490"int4 __ovld __cnfn convert_int4_rte(ulong4);\n"
29491"int4 __ovld __cnfn convert_int4_sat_rte(ulong4);\n"
29492"int4 __ovld __cnfn convert_int4_rtz(ulong4);\n"
29493"int4 __ovld __cnfn convert_int4_sat_rtz(ulong4);\n"
29494"int4 __ovld __cnfn convert_int4_rtp(ulong4);\n"
29495"int4 __ovld __cnfn convert_int4_sat_rtp(ulong4);\n"
29496"int4 __ovld __cnfn convert_int4_rtn(ulong4);\n"
29497"int4 __ovld __cnfn convert_int4_sat_rtn(ulong4);\n"
29498"int4 __ovld __cnfn convert_int4(ulong4);\n"
29499"int4 __ovld __cnfn convert_int4_sat(ulong4);\n"
29500"int4 __ovld __cnfn convert_int4_rte(float4);\n"
29501"int4 __ovld __cnfn convert_int4_sat_rte(float4);\n"
29502"int4 __ovld __cnfn convert_int4_rtz(float4);\n"
29503"int4 __ovld __cnfn convert_int4_sat_rtz(float4);\n"
29504"int4 __ovld __cnfn convert_int4_rtp(float4);\n"
29505"int4 __ovld __cnfn convert_int4_sat_rtp(float4);\n"
29506"int4 __ovld __cnfn convert_int4_rtn(float4);\n"
29507"int4 __ovld __cnfn convert_int4_sat_rtn(float4);\n"
29508"int4 __ovld __cnfn convert_int4(float4);\n"
29509"int4 __ovld __cnfn convert_int4_sat(float4);\n"
29510"uint4 __ovld __cnfn convert_uint4_rte(char4);\n"
29511"uint4 __ovld __cnfn convert_uint4_sat_rte(char4);\n"
29512"uint4 __ovld __cnfn convert_uint4_rtz(char4);\n"
29513"uint4 __ovld __cnfn convert_uint4_sat_rtz(char4);\n"
29514"uint4 __ovld __cnfn convert_uint4_rtp(char4);\n"
29515"uint4 __ovld __cnfn convert_uint4_sat_rtp(char4);\n"
29516"uint4 __ovld __cnfn convert_uint4_rtn(char4);\n"
29517"uint4 __ovld __cnfn convert_uint4_sat_rtn(char4);\n"
29518"uint4 __ovld __cnfn convert_uint4(char4);\n"
29519"uint4 __ovld __cnfn convert_uint4_sat(char4);\n"
29520"uint4 __ovld __cnfn convert_uint4_rte(uchar4);\n"
29521"uint4 __ovld __cnfn convert_uint4_sat_rte(uchar4);\n"
29522"uint4 __ovld __cnfn convert_uint4_rtz(uchar4);\n"
29523"uint4 __ovld __cnfn convert_uint4_sat_rtz(uchar4);\n"
29524"uint4 __ovld __cnfn convert_uint4_rtp(uchar4);\n"
29525"uint4 __ovld __cnfn convert_uint4_sat_rtp(uchar4);\n"
29526"uint4 __ovld __cnfn convert_uint4_rtn(uchar4);\n"
29527"uint4 __ovld __cnfn convert_uint4_sat_rtn(uchar4);\n"
29528"uint4 __ovld __cnfn convert_uint4(uchar4);\n"
29529"uint4 __ovld __cnfn convert_uint4_sat(uchar4);\n"
29530"uint4 __ovld __cnfn convert_uint4_rte(short4);\n"
29531"uint4 __ovld __cnfn convert_uint4_sat_rte(short4);\n"
29532"uint4 __ovld __cnfn convert_uint4_rtz(short4);\n"
29533"uint4 __ovld __cnfn convert_uint4_sat_rtz(short4);\n"
29534"uint4 __ovld __cnfn convert_uint4_rtp(short4);\n"
29535"uint4 __ovld __cnfn convert_uint4_sat_rtp(short4);\n"
29536"uint4 __ovld __cnfn convert_uint4_rtn(short4);\n"
29537"uint4 __ovld __cnfn convert_uint4_sat_rtn(short4);\n"
29538"uint4 __ovld __cnfn convert_uint4(short4);\n"
29539"uint4 __ovld __cnfn convert_uint4_sat(short4);\n"
29540"uint4 __ovld __cnfn convert_uint4_rte(ushort4);\n"
29541"uint4 __ovld __cnfn convert_uint4_sat_rte(ushort4);\n"
29542"uint4 __ovld __cnfn convert_uint4_rtz(ushort4);\n"
29543"uint4 __ovld __cnfn convert_uint4_sat_rtz(ushort4);\n"
29544"uint4 __ovld __cnfn convert_uint4_rtp(ushort4);\n"
29545"uint4 __ovld __cnfn convert_uint4_sat_rtp(ushort4);\n"
29546"uint4 __ovld __cnfn convert_uint4_rtn(ushort4);\n"
29547"uint4 __ovld __cnfn convert_uint4_sat_rtn(ushort4);\n"
29548"uint4 __ovld __cnfn convert_uint4(ushort4);\n"
29549"uint4 __ovld __cnfn convert_uint4_sat(ushort4);\n"
29550"uint4 __ovld __cnfn convert_uint4_rte(int4);\n"
29551"uint4 __ovld __cnfn convert_uint4_sat_rte(int4);\n"
29552"uint4 __ovld __cnfn convert_uint4_rtz(int4);\n"
29553"uint4 __ovld __cnfn convert_uint4_sat_rtz(int4);\n"
29554"uint4 __ovld __cnfn convert_uint4_rtp(int4);\n"
29555"uint4 __ovld __cnfn convert_uint4_sat_rtp(int4);\n"
29556"uint4 __ovld __cnfn convert_uint4_rtn(int4);\n"
29557"uint4 __ovld __cnfn convert_uint4_sat_rtn(int4);\n"
29558"uint4 __ovld __cnfn convert_uint4(int4);\n"
29559"uint4 __ovld __cnfn convert_uint4_sat(int4);\n"
29560"uint4 __ovld __cnfn convert_uint4_rte(uint4);\n"
29561"uint4 __ovld __cnfn convert_uint4_sat_rte(uint4);\n"
29562"uint4 __ovld __cnfn convert_uint4_rtz(uint4);\n"
29563"uint4 __ovld __cnfn convert_uint4_sat_rtz(uint4);\n"
29564"uint4 __ovld __cnfn convert_uint4_rtp(uint4);\n"
29565"uint4 __ovld __cnfn convert_uint4_sat_rtp(uint4);\n"
29566"uint4 __ovld __cnfn convert_uint4_rtn(uint4);\n"
29567"uint4 __ovld __cnfn convert_uint4_sat_rtn(uint4);\n"
29568"uint4 __ovld __cnfn convert_uint4(uint4);\n"
29569"uint4 __ovld __cnfn convert_uint4_sat(uint4);\n"
29570"uint4 __ovld __cnfn convert_uint4_rte(long4);\n"
29571"uint4 __ovld __cnfn convert_uint4_sat_rte(long4);\n"
29572"uint4 __ovld __cnfn convert_uint4_rtz(long4);\n"
29573"uint4 __ovld __cnfn convert_uint4_sat_rtz(long4);\n"
29574"uint4 __ovld __cnfn convert_uint4_rtp(long4);\n"
29575"uint4 __ovld __cnfn convert_uint4_sat_rtp(long4);\n"
29576"uint4 __ovld __cnfn convert_uint4_rtn(long4);\n"
29577"uint4 __ovld __cnfn convert_uint4_sat_rtn(long4);\n"
29578"uint4 __ovld __cnfn convert_uint4(long4);\n"
29579"uint4 __ovld __cnfn convert_uint4_sat(long4);\n"
29580"uint4 __ovld __cnfn convert_uint4_rte(ulong4);\n"
29581"uint4 __ovld __cnfn convert_uint4_sat_rte(ulong4);\n"
29582"uint4 __ovld __cnfn convert_uint4_rtz(ulong4);\n"
29583"uint4 __ovld __cnfn convert_uint4_sat_rtz(ulong4);\n"
29584"uint4 __ovld __cnfn convert_uint4_rtp(ulong4);\n"
29585"uint4 __ovld __cnfn convert_uint4_sat_rtp(ulong4);\n"
29586"uint4 __ovld __cnfn convert_uint4_rtn(ulong4);\n"
29587"uint4 __ovld __cnfn convert_uint4_sat_rtn(ulong4);\n"
29588"uint4 __ovld __cnfn convert_uint4(ulong4);\n"
29589"uint4 __ovld __cnfn convert_uint4_sat(ulong4);\n"
29590"uint4 __ovld __cnfn convert_uint4_rte(float4);\n"
29591"uint4 __ovld __cnfn convert_uint4_sat_rte(float4);\n"
29592"uint4 __ovld __cnfn convert_uint4_rtz(float4);\n"
29593"uint4 __ovld __cnfn convert_uint4_sat_rtz(float4);\n"
29594"uint4 __ovld __cnfn convert_uint4_rtp(float4);\n"
29595"uint4 __ovld __cnfn convert_uint4_sat_rtp(float4);\n"
29596"uint4 __ovld __cnfn convert_uint4_rtn(float4);\n"
29597"uint4 __ovld __cnfn convert_uint4_sat_rtn(float4);\n"
29598"uint4 __ovld __cnfn convert_uint4(float4);\n"
29599"uint4 __ovld __cnfn convert_uint4_sat(float4);\n"
29600"long4 __ovld __cnfn convert_long4_rte(char4);\n"
29601"long4 __ovld __cnfn convert_long4_sat_rte(char4);\n"
29602"long4 __ovld __cnfn convert_long4_rtz(char4);\n"
29603"long4 __ovld __cnfn convert_long4_sat_rtz(char4);\n"
29604"long4 __ovld __cnfn convert_long4_rtp(char4);\n"
29605"long4 __ovld __cnfn convert_long4_sat_rtp(char4);\n"
29606"long4 __ovld __cnfn convert_long4_rtn(char4);\n"
29607"long4 __ovld __cnfn convert_long4_sat_rtn(char4);\n"
29608"long4 __ovld __cnfn convert_long4(char4);\n"
29609"long4 __ovld __cnfn convert_long4_sat(char4);\n"
29610"long4 __ovld __cnfn convert_long4_rte(uchar4);\n"
29611"long4 __ovld __cnfn convert_long4_sat_rte(uchar4);\n"
29612"long4 __ovld __cnfn convert_long4_rtz(uchar4);\n"
29613"long4 __ovld __cnfn convert_long4_sat_rtz(uchar4);\n"
29614"long4 __ovld __cnfn convert_long4_rtp(uchar4);\n"
29615"long4 __ovld __cnfn convert_long4_sat_rtp(uchar4);\n"
29616"long4 __ovld __cnfn convert_long4_rtn(uchar4);\n"
29617"long4 __ovld __cnfn convert_long4_sat_rtn(uchar4);\n"
29618"long4 __ovld __cnfn convert_long4(uchar4);\n"
29619"long4 __ovld __cnfn convert_long4_sat(uchar4);\n"
29620"long4 __ovld __cnfn convert_long4_rte(short4);\n"
29621"long4 __ovld __cnfn convert_long4_sat_rte(short4);\n"
29622"long4 __ovld __cnfn convert_long4_rtz(short4);\n"
29623"long4 __ovld __cnfn convert_long4_sat_rtz(short4);\n"
29624"long4 __ovld __cnfn convert_long4_rtp(short4);\n"
29625"long4 __ovld __cnfn convert_long4_sat_rtp(short4);\n"
29626"long4 __ovld __cnfn convert_long4_rtn(short4);\n"
29627"long4 __ovld __cnfn convert_long4_sat_rtn(short4);\n"
29628"long4 __ovld __cnfn convert_long4(short4);\n"
29629"long4 __ovld __cnfn convert_long4_sat(short4);\n"
29630"long4 __ovld __cnfn convert_long4_rte(ushort4);\n"
29631"long4 __ovld __cnfn convert_long4_sat_rte(ushort4);\n"
29632"long4 __ovld __cnfn convert_long4_rtz(ushort4);\n"
29633"long4 __ovld __cnfn convert_long4_sat_rtz(ushort4);\n"
29634"long4 __ovld __cnfn convert_long4_rtp(ushort4);\n"
29635"long4 __ovld __cnfn convert_long4_sat_rtp(ushort4);\n"
29636"long4 __ovld __cnfn convert_long4_rtn(ushort4);\n"
29637"long4 __ovld __cnfn convert_long4_sat_rtn(ushort4);\n"
29638"long4 __ovld __cnfn convert_long4(ushort4);\n"
29639"long4 __ovld __cnfn convert_long4_sat(ushort4);\n"
29640"long4 __ovld __cnfn convert_long4_rte(int4);\n"
29641"long4 __ovld __cnfn convert_long4_sat_rte(int4);\n"
29642"long4 __ovld __cnfn convert_long4_rtz(int4);\n"
29643"long4 __ovld __cnfn convert_long4_sat_rtz(int4);\n"
29644"long4 __ovld __cnfn convert_long4_rtp(int4);\n"
29645"long4 __ovld __cnfn convert_long4_sat_rtp(int4);\n"
29646"long4 __ovld __cnfn convert_long4_rtn(int4);\n"
29647"long4 __ovld __cnfn convert_long4_sat_rtn(int4);\n"
29648"long4 __ovld __cnfn convert_long4(int4);\n"
29649"long4 __ovld __cnfn convert_long4_sat(int4);\n"
29650"long4 __ovld __cnfn convert_long4_rte(uint4);\n"
29651"long4 __ovld __cnfn convert_long4_sat_rte(uint4);\n"
29652"long4 __ovld __cnfn convert_long4_rtz(uint4);\n"
29653"long4 __ovld __cnfn convert_long4_sat_rtz(uint4);\n"
29654"long4 __ovld __cnfn convert_long4_rtp(uint4);\n"
29655"long4 __ovld __cnfn convert_long4_sat_rtp(uint4);\n"
29656"long4 __ovld __cnfn convert_long4_rtn(uint4);\n"
29657"long4 __ovld __cnfn convert_long4_sat_rtn(uint4);\n"
29658"long4 __ovld __cnfn convert_long4(uint4);\n"
29659"long4 __ovld __cnfn convert_long4_sat(uint4);\n"
29660"long4 __ovld __cnfn convert_long4_rte(long4);\n"
29661"long4 __ovld __cnfn convert_long4_sat_rte(long4);\n"
29662"long4 __ovld __cnfn convert_long4_rtz(long4);\n"
29663"long4 __ovld __cnfn convert_long4_sat_rtz(long4);\n"
29664"long4 __ovld __cnfn convert_long4_rtp(long4);\n"
29665"long4 __ovld __cnfn convert_long4_sat_rtp(long4);\n"
29666"long4 __ovld __cnfn convert_long4_rtn(long4);\n"
29667"long4 __ovld __cnfn convert_long4_sat_rtn(long4);\n"
29668"long4 __ovld __cnfn convert_long4(long4);\n"
29669"long4 __ovld __cnfn convert_long4_sat(long4);\n"
29670"long4 __ovld __cnfn convert_long4_rte(ulong4);\n"
29671"long4 __ovld __cnfn convert_long4_sat_rte(ulong4);\n"
29672"long4 __ovld __cnfn convert_long4_rtz(ulong4);\n"
29673"long4 __ovld __cnfn convert_long4_sat_rtz(ulong4);\n"
29674"long4 __ovld __cnfn convert_long4_rtp(ulong4);\n"
29675"long4 __ovld __cnfn convert_long4_sat_rtp(ulong4);\n"
29676"long4 __ovld __cnfn convert_long4_rtn(ulong4);\n"
29677"long4 __ovld __cnfn convert_long4_sat_rtn(ulong4);\n"
29678"long4 __ovld __cnfn convert_long4(ulong4);\n"
29679"long4 __ovld __cnfn convert_long4_sat(ulong4);\n"
29680"long4 __ovld __cnfn convert_long4_rte(float4);\n"
29681"long4 __ovld __cnfn convert_long4_sat_rte(float4);\n"
29682"long4 __ovld __cnfn convert_long4_rtz(float4);\n"
29683"long4 __ovld __cnfn convert_long4_sat_rtz(float4);\n"
29684"long4 __ovld __cnfn convert_long4_rtp(float4);\n"
29685"long4 __ovld __cnfn convert_long4_sat_rtp(float4);\n"
29686"long4 __ovld __cnfn convert_long4_rtn(float4);\n"
29687"long4 __ovld __cnfn convert_long4_sat_rtn(float4);\n"
29688"long4 __ovld __cnfn convert_long4(float4);\n"
29689"long4 __ovld __cnfn convert_long4_sat(float4);\n"
29690"ulong4 __ovld __cnfn convert_ulong4_rte(char4);\n"
29691"ulong4 __ovld __cnfn convert_ulong4_sat_rte(char4);\n"
29692"ulong4 __ovld __cnfn convert_ulong4_rtz(char4);\n"
29693"ulong4 __ovld __cnfn convert_ulong4_sat_rtz(char4);\n"
29694"ulong4 __ovld __cnfn convert_ulong4_rtp(char4);\n"
29695"ulong4 __ovld __cnfn convert_ulong4_sat_rtp(char4);\n"
29696"ulong4 __ovld __cnfn convert_ulong4_rtn(char4);\n"
29697"ulong4 __ovld __cnfn convert_ulong4_sat_rtn(char4);\n"
29698"ulong4 __ovld __cnfn convert_ulong4(char4);\n"
29699"ulong4 __ovld __cnfn convert_ulong4_sat(char4);\n"
29700"ulong4 __ovld __cnfn convert_ulong4_rte(uchar4);\n"
29701"ulong4 __ovld __cnfn convert_ulong4_sat_rte(uchar4);\n"
29702"ulong4 __ovld __cnfn convert_ulong4_rtz(uchar4);\n"
29703"ulong4 __ovld __cnfn convert_ulong4_sat_rtz(uchar4);\n"
29704"ulong4 __ovld __cnfn convert_ulong4_rtp(uchar4);\n"
29705"ulong4 __ovld __cnfn convert_ulong4_sat_rtp(uchar4);\n"
29706"ulong4 __ovld __cnfn convert_ulong4_rtn(uchar4);\n"
29707"ulong4 __ovld __cnfn convert_ulong4_sat_rtn(uchar4);\n"
29708"ulong4 __ovld __cnfn convert_ulong4(uchar4);\n"
29709"ulong4 __ovld __cnfn convert_ulong4_sat(uchar4);\n"
29710"ulong4 __ovld __cnfn convert_ulong4_rte(short4);\n"
29711"ulong4 __ovld __cnfn convert_ulong4_sat_rte(short4);\n"
29712"ulong4 __ovld __cnfn convert_ulong4_rtz(short4);\n"
29713"ulong4 __ovld __cnfn convert_ulong4_sat_rtz(short4);\n"
29714"ulong4 __ovld __cnfn convert_ulong4_rtp(short4);\n"
29715"ulong4 __ovld __cnfn convert_ulong4_sat_rtp(short4);\n"
29716"ulong4 __ovld __cnfn convert_ulong4_rtn(short4);\n"
29717"ulong4 __ovld __cnfn convert_ulong4_sat_rtn(short4);\n"
29718"ulong4 __ovld __cnfn convert_ulong4(short4);\n"
29719"ulong4 __ovld __cnfn convert_ulong4_sat(short4);\n"
29720"ulong4 __ovld __cnfn convert_ulong4_rte(ushort4);\n"
29721"ulong4 __ovld __cnfn convert_ulong4_sat_rte(ushort4);\n"
29722"ulong4 __ovld __cnfn convert_ulong4_rtz(ushort4);\n"
29723"ulong4 __ovld __cnfn convert_ulong4_sat_rtz(ushort4);\n"
29724"ulong4 __ovld __cnfn convert_ulong4_rtp(ushort4);\n"
29725"ulong4 __ovld __cnfn convert_ulong4_sat_rtp(ushort4);\n"
29726"ulong4 __ovld __cnfn convert_ulong4_rtn(ushort4);\n"
29727"ulong4 __ovld __cnfn convert_ulong4_sat_rtn(ushort4);\n"
29728"ulong4 __ovld __cnfn convert_ulong4(ushort4);\n"
29729"ulong4 __ovld __cnfn convert_ulong4_sat(ushort4);\n"
29730"ulong4 __ovld __cnfn convert_ulong4_rte(int4);\n"
29731"ulong4 __ovld __cnfn convert_ulong4_sat_rte(int4);\n"
29732"ulong4 __ovld __cnfn convert_ulong4_rtz(int4);\n"
29733"ulong4 __ovld __cnfn convert_ulong4_sat_rtz(int4);\n"
29734"ulong4 __ovld __cnfn convert_ulong4_rtp(int4);\n"
29735"ulong4 __ovld __cnfn convert_ulong4_sat_rtp(int4);\n"
29736"ulong4 __ovld __cnfn convert_ulong4_rtn(int4);\n"
29737"ulong4 __ovld __cnfn convert_ulong4_sat_rtn(int4);\n"
29738"ulong4 __ovld __cnfn convert_ulong4(int4);\n"
29739"ulong4 __ovld __cnfn convert_ulong4_sat(int4);\n"
29740"ulong4 __ovld __cnfn convert_ulong4_rte(uint4);\n"
29741"ulong4 __ovld __cnfn convert_ulong4_sat_rte(uint4);\n"
29742"ulong4 __ovld __cnfn convert_ulong4_rtz(uint4);\n"
29743"ulong4 __ovld __cnfn convert_ulong4_sat_rtz(uint4);\n"
29744"ulong4 __ovld __cnfn convert_ulong4_rtp(uint4);\n"
29745"ulong4 __ovld __cnfn convert_ulong4_sat_rtp(uint4);\n"
29746"ulong4 __ovld __cnfn convert_ulong4_rtn(uint4);\n"
29747"ulong4 __ovld __cnfn convert_ulong4_sat_rtn(uint4);\n"
29748"ulong4 __ovld __cnfn convert_ulong4(uint4);\n"
29749"ulong4 __ovld __cnfn convert_ulong4_sat(uint4);\n"
29750"ulong4 __ovld __cnfn convert_ulong4_rte(long4);\n"
29751"ulong4 __ovld __cnfn convert_ulong4_sat_rte(long4);\n"
29752"ulong4 __ovld __cnfn convert_ulong4_rtz(long4);\n"
29753"ulong4 __ovld __cnfn convert_ulong4_sat_rtz(long4);\n"
29754"ulong4 __ovld __cnfn convert_ulong4_rtp(long4);\n"
29755"ulong4 __ovld __cnfn convert_ulong4_sat_rtp(long4);\n"
29756"ulong4 __ovld __cnfn convert_ulong4_rtn(long4);\n"
29757"ulong4 __ovld __cnfn convert_ulong4_sat_rtn(long4);\n"
29758"ulong4 __ovld __cnfn convert_ulong4(long4);\n"
29759"ulong4 __ovld __cnfn convert_ulong4_sat(long4);\n"
29760"ulong4 __ovld __cnfn convert_ulong4_rte(ulong4);\n"
29761"ulong4 __ovld __cnfn convert_ulong4_sat_rte(ulong4);\n"
29762"ulong4 __ovld __cnfn convert_ulong4_rtz(ulong4);\n"
29763"ulong4 __ovld __cnfn convert_ulong4_sat_rtz(ulong4);\n"
29764"ulong4 __ovld __cnfn convert_ulong4_rtp(ulong4);\n"
29765"ulong4 __ovld __cnfn convert_ulong4_sat_rtp(ulong4);\n"
29766"ulong4 __ovld __cnfn convert_ulong4_rtn(ulong4);\n"
29767"ulong4 __ovld __cnfn convert_ulong4_sat_rtn(ulong4);\n"
29768"ulong4 __ovld __cnfn convert_ulong4(ulong4);\n"
29769"ulong4 __ovld __cnfn convert_ulong4_sat(ulong4);\n"
29770"ulong4 __ovld __cnfn convert_ulong4_rte(float4);\n"
29771"ulong4 __ovld __cnfn convert_ulong4_sat_rte(float4);\n"
29772"ulong4 __ovld __cnfn convert_ulong4_rtz(float4);\n"
29773"ulong4 __ovld __cnfn convert_ulong4_sat_rtz(float4);\n"
29774"ulong4 __ovld __cnfn convert_ulong4_rtp(float4);\n"
29775"ulong4 __ovld __cnfn convert_ulong4_sat_rtp(float4);\n"
29776"ulong4 __ovld __cnfn convert_ulong4_rtn(float4);\n"
29777"ulong4 __ovld __cnfn convert_ulong4_sat_rtn(float4);\n"
29778"ulong4 __ovld __cnfn convert_ulong4(float4);\n"
29779"ulong4 __ovld __cnfn convert_ulong4_sat(float4);\n"
29780"float4 __ovld __cnfn convert_float4_rte(char4);\n"
29781"float4 __ovld __cnfn convert_float4_rtz(char4);\n"
29782"float4 __ovld __cnfn convert_float4_rtp(char4);\n"
29783"float4 __ovld __cnfn convert_float4_rtn(char4);\n"
29784"float4 __ovld __cnfn convert_float4(char4);\n"
29785"float4 __ovld __cnfn convert_float4_rte(uchar4);\n"
29786"float4 __ovld __cnfn convert_float4_rtz(uchar4);\n"
29787"float4 __ovld __cnfn convert_float4_rtp(uchar4);\n"
29788"float4 __ovld __cnfn convert_float4_rtn(uchar4);\n"
29789"float4 __ovld __cnfn convert_float4(uchar4);\n"
29790"float4 __ovld __cnfn convert_float4_rte(short4);\n"
29791"float4 __ovld __cnfn convert_float4_rtz(short4);\n"
29792"float4 __ovld __cnfn convert_float4_rtp(short4);\n"
29793"float4 __ovld __cnfn convert_float4_rtn(short4);\n"
29794"float4 __ovld __cnfn convert_float4(short4);\n"
29795"float4 __ovld __cnfn convert_float4_rte(ushort4);\n"
29796"float4 __ovld __cnfn convert_float4_rtz(ushort4);\n"
29797"float4 __ovld __cnfn convert_float4_rtp(ushort4);\n"
29798"float4 __ovld __cnfn convert_float4_rtn(ushort4);\n"
29799"float4 __ovld __cnfn convert_float4(ushort4);\n"
29800"float4 __ovld __cnfn convert_float4_rte(int4);\n"
29801"float4 __ovld __cnfn convert_float4_rtz(int4);\n"
29802"float4 __ovld __cnfn convert_float4_rtp(int4);\n"
29803"float4 __ovld __cnfn convert_float4_rtn(int4);\n"
29804"float4 __ovld __cnfn convert_float4(int4);\n"
29805"float4 __ovld __cnfn convert_float4_rte(uint4);\n"
29806"float4 __ovld __cnfn convert_float4_rtz(uint4);\n"
29807"float4 __ovld __cnfn convert_float4_rtp(uint4);\n"
29808"float4 __ovld __cnfn convert_float4_rtn(uint4);\n"
29809"float4 __ovld __cnfn convert_float4(uint4);\n"
29810"float4 __ovld __cnfn convert_float4_rte(long4);\n"
29811"float4 __ovld __cnfn convert_float4_rtz(long4);\n"
29812"float4 __ovld __cnfn convert_float4_rtp(long4);\n"
29813"float4 __ovld __cnfn convert_float4_rtn(long4);\n"
29814"float4 __ovld __cnfn convert_float4(long4);\n"
29815"float4 __ovld __cnfn convert_float4_rte(ulong4);\n"
29816"float4 __ovld __cnfn convert_float4_rtz(ulong4);\n"
29817"float4 __ovld __cnfn convert_float4_rtp(ulong4);\n"
29818"float4 __ovld __cnfn convert_float4_rtn(ulong4);\n"
29819"float4 __ovld __cnfn convert_float4(ulong4);\n"
29820"float4 __ovld __cnfn convert_float4_rte(float4);\n"
29821"float4 __ovld __cnfn convert_float4_rtz(float4);\n"
29822"float4 __ovld __cnfn convert_float4_rtp(float4);\n"
29823"float4 __ovld __cnfn convert_float4_rtn(float4);\n"
29824"float4 __ovld __cnfn convert_float4(float4);\n"
29825"char8 __ovld __cnfn convert_char8_rte(char8);\n"
29826"char8 __ovld __cnfn convert_char8_sat_rte(char8);\n"
29827"char8 __ovld __cnfn convert_char8_rtz(char8);\n"
29828"char8 __ovld __cnfn convert_char8_sat_rtz(char8);\n"
29829"char8 __ovld __cnfn convert_char8_rtp(char8);\n"
29830"char8 __ovld __cnfn convert_char8_sat_rtp(char8);\n"
29831"char8 __ovld __cnfn convert_char8_rtn(char8);\n"
29832"char8 __ovld __cnfn convert_char8_sat_rtn(char8);\n"
29833"char8 __ovld __cnfn convert_char8(char8);\n"
29834"char8 __ovld __cnfn convert_char8_sat(char8);\n"
29835"char8 __ovld __cnfn convert_char8_rte(uchar8);\n"
29836"char8 __ovld __cnfn convert_char8_sat_rte(uchar8);\n"
29837"char8 __ovld __cnfn convert_char8_rtz(uchar8);\n"
29838"char8 __ovld __cnfn convert_char8_sat_rtz(uchar8);\n"
29839"char8 __ovld __cnfn convert_char8_rtp(uchar8);\n"
29840"char8 __ovld __cnfn convert_char8_sat_rtp(uchar8);\n"
29841"char8 __ovld __cnfn convert_char8_rtn(uchar8);\n"
29842"char8 __ovld __cnfn convert_char8_sat_rtn(uchar8);\n"
29843"char8 __ovld __cnfn convert_char8(uchar8);\n"
29844"char8 __ovld __cnfn convert_char8_sat(uchar8);\n"
29845"char8 __ovld __cnfn convert_char8_rte(short8);\n"
29846"char8 __ovld __cnfn convert_char8_sat_rte(short8);\n"
29847"char8 __ovld __cnfn convert_char8_rtz(short8);\n"
29848"char8 __ovld __cnfn convert_char8_sat_rtz(short8);\n"
29849"char8 __ovld __cnfn convert_char8_rtp(short8);\n"
29850"char8 __ovld __cnfn convert_char8_sat_rtp(short8);\n"
29851"char8 __ovld __cnfn convert_char8_rtn(short8);\n"
29852"char8 __ovld __cnfn convert_char8_sat_rtn(short8);\n"
29853"char8 __ovld __cnfn convert_char8(short8);\n"
29854"char8 __ovld __cnfn convert_char8_sat(short8);\n"
29855"char8 __ovld __cnfn convert_char8_rte(ushort8);\n"
29856"char8 __ovld __cnfn convert_char8_sat_rte(ushort8);\n"
29857"char8 __ovld __cnfn convert_char8_rtz(ushort8);\n"
29858"char8 __ovld __cnfn convert_char8_sat_rtz(ushort8);\n"
29859"char8 __ovld __cnfn convert_char8_rtp(ushort8);\n"
29860"char8 __ovld __cnfn convert_char8_sat_rtp(ushort8);\n"
29861"char8 __ovld __cnfn convert_char8_rtn(ushort8);\n"
29862"char8 __ovld __cnfn convert_char8_sat_rtn(ushort8);\n"
29863"char8 __ovld __cnfn convert_char8(ushort8);\n"
29864"char8 __ovld __cnfn convert_char8_sat(ushort8);\n"
29865"char8 __ovld __cnfn convert_char8_rte(int8);\n"
29866"char8 __ovld __cnfn convert_char8_sat_rte(int8);\n"
29867"char8 __ovld __cnfn convert_char8_rtz(int8);\n"
29868"char8 __ovld __cnfn convert_char8_sat_rtz(int8);\n"
29869"char8 __ovld __cnfn convert_char8_rtp(int8);\n"
29870"char8 __ovld __cnfn convert_char8_sat_rtp(int8);\n"
29871"char8 __ovld __cnfn convert_char8_rtn(int8);\n"
29872"char8 __ovld __cnfn convert_char8_sat_rtn(int8);\n"
29873"char8 __ovld __cnfn convert_char8(int8);\n"
29874"char8 __ovld __cnfn convert_char8_sat(int8);\n"
29875"char8 __ovld __cnfn convert_char8_rte(uint8);\n"
29876"char8 __ovld __cnfn convert_char8_sat_rte(uint8);\n"
29877"char8 __ovld __cnfn convert_char8_rtz(uint8);\n"
29878"char8 __ovld __cnfn convert_char8_sat_rtz(uint8);\n"
29879"char8 __ovld __cnfn convert_char8_rtp(uint8);\n"
29880"char8 __ovld __cnfn convert_char8_sat_rtp(uint8);\n"
29881"char8 __ovld __cnfn convert_char8_rtn(uint8);\n"
29882"char8 __ovld __cnfn convert_char8_sat_rtn(uint8);\n"
29883"char8 __ovld __cnfn convert_char8(uint8);\n"
29884"char8 __ovld __cnfn convert_char8_sat(uint8);\n"
29885"char8 __ovld __cnfn convert_char8_rte(long8);\n"
29886"char8 __ovld __cnfn convert_char8_sat_rte(long8);\n"
29887"char8 __ovld __cnfn convert_char8_rtz(long8);\n"
29888"char8 __ovld __cnfn convert_char8_sat_rtz(long8);\n"
29889"char8 __ovld __cnfn convert_char8_rtp(long8);\n"
29890"char8 __ovld __cnfn convert_char8_sat_rtp(long8);\n"
29891"char8 __ovld __cnfn convert_char8_rtn(long8);\n"
29892"char8 __ovld __cnfn convert_char8_sat_rtn(long8);\n"
29893"char8 __ovld __cnfn convert_char8(long8);\n"
29894"char8 __ovld __cnfn convert_char8_sat(long8);\n"
29895"char8 __ovld __cnfn convert_char8_rte(ulong8);\n"
29896"char8 __ovld __cnfn convert_char8_sat_rte(ulong8);\n"
29897"char8 __ovld __cnfn convert_char8_rtz(ulong8);\n"
29898"char8 __ovld __cnfn convert_char8_sat_rtz(ulong8);\n"
29899"char8 __ovld __cnfn convert_char8_rtp(ulong8);\n"
29900"char8 __ovld __cnfn convert_char8_sat_rtp(ulong8);\n"
29901"char8 __ovld __cnfn convert_char8_rtn(ulong8);\n"
29902"char8 __ovld __cnfn convert_char8_sat_rtn(ulong8);\n"
29903"char8 __ovld __cnfn convert_char8(ulong8);\n"
29904"char8 __ovld __cnfn convert_char8_sat(ulong8);\n"
29905"char8 __ovld __cnfn convert_char8_rte(float8);\n"
29906"char8 __ovld __cnfn convert_char8_sat_rte(float8);\n"
29907"char8 __ovld __cnfn convert_char8_rtz(float8);\n"
29908"char8 __ovld __cnfn convert_char8_sat_rtz(float8);\n"
29909"char8 __ovld __cnfn convert_char8_rtp(float8);\n"
29910"char8 __ovld __cnfn convert_char8_sat_rtp(float8);\n"
29911"char8 __ovld __cnfn convert_char8_rtn(float8);\n"
29912"char8 __ovld __cnfn convert_char8_sat_rtn(float8);\n"
29913"char8 __ovld __cnfn convert_char8(float8);\n"
29914"char8 __ovld __cnfn convert_char8_sat(float8);\n"
29915"uchar8 __ovld __cnfn convert_uchar8_rte(char8);\n"
29916"uchar8 __ovld __cnfn convert_uchar8_sat_rte(char8);\n"
29917"uchar8 __ovld __cnfn convert_uchar8_rtz(char8);\n"
29918"uchar8 __ovld __cnfn convert_uchar8_sat_rtz(char8);\n"
29919"uchar8 __ovld __cnfn convert_uchar8_rtp(char8);\n"
29920"uchar8 __ovld __cnfn convert_uchar8_sat_rtp(char8);\n"
29921"uchar8 __ovld __cnfn convert_uchar8_rtn(char8);\n"
29922"uchar8 __ovld __cnfn convert_uchar8_sat_rtn(char8);\n"
29923"uchar8 __ovld __cnfn convert_uchar8(char8);\n"
29924"uchar8 __ovld __cnfn convert_uchar8_sat(char8);\n"
29925"uchar8 __ovld __cnfn convert_uchar8_rte(uchar8);\n"
29926"uchar8 __ovld __cnfn convert_uchar8_sat_rte(uchar8);\n"
29927"uchar8 __ovld __cnfn convert_uchar8_rtz(uchar8);\n"
29928"uchar8 __ovld __cnfn convert_uchar8_sat_rtz(uchar8);\n"
29929"uchar8 __ovld __cnfn convert_uchar8_rtp(uchar8);\n"
29930"uchar8 __ovld __cnfn convert_uchar8_sat_rtp(uchar8);\n"
29931"uchar8 __ovld __cnfn convert_uchar8_rtn(uchar8);\n"
29932"uchar8 __ovld __cnfn convert_uchar8_sat_rtn(uchar8);\n"
29933"uchar8 __ovld __cnfn convert_uchar8(uchar8);\n"
29934"uchar8 __ovld __cnfn convert_uchar8_sat(uchar8);\n"
29935"uchar8 __ovld __cnfn convert_uchar8_rte(short8);\n"
29936"uchar8 __ovld __cnfn convert_uchar8_sat_rte(short8);\n"
29937"uchar8 __ovld __cnfn convert_uchar8_rtz(short8);\n"
29938"uchar8 __ovld __cnfn convert_uchar8_sat_rtz(short8);\n"
29939"uchar8 __ovld __cnfn convert_uchar8_rtp(short8);\n"
29940"uchar8 __ovld __cnfn convert_uchar8_sat_rtp(short8);\n"
29941"uchar8 __ovld __cnfn convert_uchar8_rtn(short8);\n"
29942"uchar8 __ovld __cnfn convert_uchar8_sat_rtn(short8);\n"
29943"uchar8 __ovld __cnfn convert_uchar8(short8);\n"
29944"uchar8 __ovld __cnfn convert_uchar8_sat(short8);\n"
29945"uchar8 __ovld __cnfn convert_uchar8_rte(ushort8);\n"
29946"uchar8 __ovld __cnfn convert_uchar8_sat_rte(ushort8);\n"
29947"uchar8 __ovld __cnfn convert_uchar8_rtz(ushort8);\n"
29948"uchar8 __ovld __cnfn convert_uchar8_sat_rtz(ushort8);\n"
29949"uchar8 __ovld __cnfn convert_uchar8_rtp(ushort8);\n"
29950"uchar8 __ovld __cnfn convert_uchar8_sat_rtp(ushort8);\n"
29951"uchar8 __ovld __cnfn convert_uchar8_rtn(ushort8);\n"
29952"uchar8 __ovld __cnfn convert_uchar8_sat_rtn(ushort8);\n"
29953"uchar8 __ovld __cnfn convert_uchar8(ushort8);\n"
29954"uchar8 __ovld __cnfn convert_uchar8_sat(ushort8);\n"
29955"uchar8 __ovld __cnfn convert_uchar8_rte(int8);\n"
29956"uchar8 __ovld __cnfn convert_uchar8_sat_rte(int8);\n"
29957"uchar8 __ovld __cnfn convert_uchar8_rtz(int8);\n"
29958"uchar8 __ovld __cnfn convert_uchar8_sat_rtz(int8);\n"
29959"uchar8 __ovld __cnfn convert_uchar8_rtp(int8);\n"
29960"uchar8 __ovld __cnfn convert_uchar8_sat_rtp(int8);\n"
29961"uchar8 __ovld __cnfn convert_uchar8_rtn(int8);\n"
29962"uchar8 __ovld __cnfn convert_uchar8_sat_rtn(int8);\n"
29963"uchar8 __ovld __cnfn convert_uchar8(int8);\n"
29964"uchar8 __ovld __cnfn convert_uchar8_sat(int8);\n"
29965"uchar8 __ovld __cnfn convert_uchar8_rte(uint8);\n"
29966"uchar8 __ovld __cnfn convert_uchar8_sat_rte(uint8);\n"
29967"uchar8 __ovld __cnfn convert_uchar8_rtz(uint8);\n"
29968"uchar8 __ovld __cnfn convert_uchar8_sat_rtz(uint8);\n"
29969"uchar8 __ovld __cnfn convert_uchar8_rtp(uint8);\n"
29970"uchar8 __ovld __cnfn convert_uchar8_sat_rtp(uint8);\n"
29971"uchar8 __ovld __cnfn convert_uchar8_rtn(uint8);\n"
29972"uchar8 __ovld __cnfn convert_uchar8_sat_rtn(uint8);\n"
29973"uchar8 __ovld __cnfn convert_uchar8(uint8);\n"
29974"uchar8 __ovld __cnfn convert_uchar8_sat(uint8);\n"
29975"uchar8 __ovld __cnfn convert_uchar8_rte(long8);\n"
29976"uchar8 __ovld __cnfn convert_uchar8_sat_rte(long8);\n"
29977"uchar8 __ovld __cnfn convert_uchar8_rtz(long8);\n"
29978"uchar8 __ovld __cnfn convert_uchar8_sat_rtz(long8);\n"
29979"uchar8 __ovld __cnfn convert_uchar8_rtp(long8);\n"
29980"uchar8 __ovld __cnfn convert_uchar8_sat_rtp(long8);\n"
29981"uchar8 __ovld __cnfn convert_uchar8_rtn(long8);\n"
29982"uchar8 __ovld __cnfn convert_uchar8_sat_rtn(long8);\n"
29983"uchar8 __ovld __cnfn convert_uchar8(long8);\n"
29984"uchar8 __ovld __cnfn convert_uchar8_sat(long8);\n"
29985"uchar8 __ovld __cnfn convert_uchar8_rte(ulong8);\n"
29986"uchar8 __ovld __cnfn convert_uchar8_sat_rte(ulong8);\n"
29987"uchar8 __ovld __cnfn convert_uchar8_rtz(ulong8);\n"
29988"uchar8 __ovld __cnfn convert_uchar8_sat_rtz(ulong8);\n"
29989"uchar8 __ovld __cnfn convert_uchar8_rtp(ulong8);\n"
29990"uchar8 __ovld __cnfn convert_uchar8_sat_rtp(ulong8);\n"
29991"uchar8 __ovld __cnfn convert_uchar8_rtn(ulong8);\n"
29992"uchar8 __ovld __cnfn convert_uchar8_sat_rtn(ulong8);\n"
29993"uchar8 __ovld __cnfn convert_uchar8(ulong8);\n"
29994"uchar8 __ovld __cnfn convert_uchar8_sat(ulong8);\n"
29995"uchar8 __ovld __cnfn convert_uchar8_rte(float8);\n"
29996"uchar8 __ovld __cnfn convert_uchar8_sat_rte(float8);\n"
29997"uchar8 __ovld __cnfn convert_uchar8_rtz(float8);\n"
29998"uchar8 __ovld __cnfn convert_uchar8_sat_rtz(float8);\n"
29999"uchar8 __ovld __cnfn convert_uchar8_rtp(float8);\n"
30000"uchar8 __ovld __cnfn convert_uchar8_sat_rtp(float8);\n"
30001"uchar8 __ovld __cnfn convert_uchar8_rtn(float8);\n"
30002"uchar8 __ovld __cnfn convert_uchar8_sat_rtn(float8);\n"
30003"uchar8 __ovld __cnfn convert_uchar8(float8);\n"
30004"uchar8 __ovld __cnfn convert_uchar8_sat(float8);\n"
30005"short8 __ovld __cnfn convert_short8_rte(char8);\n"
30006"short8 __ovld __cnfn convert_short8_sat_rte(char8);\n"
30007"short8 __ovld __cnfn convert_short8_rtz(char8);\n"
30008"short8 __ovld __cnfn convert_short8_sat_rtz(char8);\n"
30009"short8 __ovld __cnfn convert_short8_rtp(char8);\n"
30010"short8 __ovld __cnfn convert_short8_sat_rtp(char8);\n"
30011"short8 __ovld __cnfn convert_short8_rtn(char8);\n"
30012"short8 __ovld __cnfn convert_short8_sat_rtn(char8);\n"
30013"short8 __ovld __cnfn convert_short8(char8);\n"
30014"short8 __ovld __cnfn convert_short8_sat(char8);\n"
30015"short8 __ovld __cnfn convert_short8_rte(uchar8);\n"
30016"short8 __ovld __cnfn convert_short8_sat_rte(uchar8);\n"
30017"short8 __ovld __cnfn convert_short8_rtz(uchar8);\n"
30018"short8 __ovld __cnfn convert_short8_sat_rtz(uchar8);\n"
30019"short8 __ovld __cnfn convert_short8_rtp(uchar8);\n"
30020"short8 __ovld __cnfn convert_short8_sat_rtp(uchar8);\n"
30021"short8 __ovld __cnfn convert_short8_rtn(uchar8);\n"
30022"short8 __ovld __cnfn convert_short8_sat_rtn(uchar8);\n"
30023"short8 __ovld __cnfn convert_short8(uchar8);\n"
30024"short8 __ovld __cnfn convert_short8_sat(uchar8);\n"
30025"short8 __ovld __cnfn convert_short8_rte(short8);\n"
30026"short8 __ovld __cnfn convert_short8_sat_rte(short8);\n"
30027"short8 __ovld __cnfn convert_short8_rtz(short8);\n"
30028"short8 __ovld __cnfn convert_short8_sat_rtz(short8);\n"
30029"short8 __ovld __cnfn convert_short8_rtp(short8);\n"
30030"short8 __ovld __cnfn convert_short8_sat_rtp(short8);\n"
30031"short8 __ovld __cnfn convert_short8_rtn(short8);\n"
30032"short8 __ovld __cnfn convert_short8_sat_rtn(short8);\n"
30033"short8 __ovld __cnfn convert_short8(short8);\n"
30034"short8 __ovld __cnfn convert_short8_sat(short8);\n"
30035"short8 __ovld __cnfn convert_short8_rte(ushort8);\n"
30036"short8 __ovld __cnfn convert_short8_sat_rte(ushort8);\n"
30037"short8 __ovld __cnfn convert_short8_rtz(ushort8);\n"
30038"short8 __ovld __cnfn convert_short8_sat_rtz(ushort8);\n"
30039"short8 __ovld __cnfn convert_short8_rtp(ushort8);\n"
30040"short8 __ovld __cnfn convert_short8_sat_rtp(ushort8);\n"
30041"short8 __ovld __cnfn convert_short8_rtn(ushort8);\n"
30042"short8 __ovld __cnfn convert_short8_sat_rtn(ushort8);\n"
30043"short8 __ovld __cnfn convert_short8(ushort8);\n"
30044"short8 __ovld __cnfn convert_short8_sat(ushort8);\n"
30045"short8 __ovld __cnfn convert_short8_rte(int8);\n"
30046"short8 __ovld __cnfn convert_short8_sat_rte(int8);\n"
30047"short8 __ovld __cnfn convert_short8_rtz(int8);\n"
30048"short8 __ovld __cnfn convert_short8_sat_rtz(int8);\n"
30049"short8 __ovld __cnfn convert_short8_rtp(int8);\n"
30050"short8 __ovld __cnfn convert_short8_sat_rtp(int8);\n"
30051"short8 __ovld __cnfn convert_short8_rtn(int8);\n"
30052"short8 __ovld __cnfn convert_short8_sat_rtn(int8);\n"
30053"short8 __ovld __cnfn convert_short8(int8);\n"
30054"short8 __ovld __cnfn convert_short8_sat(int8);\n"
30055"short8 __ovld __cnfn convert_short8_rte(uint8);\n"
30056"short8 __ovld __cnfn convert_short8_sat_rte(uint8);\n"
30057"short8 __ovld __cnfn convert_short8_rtz(uint8);\n"
30058"short8 __ovld __cnfn convert_short8_sat_rtz(uint8);\n"
30059"short8 __ovld __cnfn convert_short8_rtp(uint8);\n"
30060"short8 __ovld __cnfn convert_short8_sat_rtp(uint8);\n"
30061"short8 __ovld __cnfn convert_short8_rtn(uint8);\n"
30062"short8 __ovld __cnfn convert_short8_sat_rtn(uint8);\n"
30063"short8 __ovld __cnfn convert_short8(uint8);\n"
30064"short8 __ovld __cnfn convert_short8_sat(uint8);\n"
30065"short8 __ovld __cnfn convert_short8_rte(long8);\n"
30066"short8 __ovld __cnfn convert_short8_sat_rte(long8);\n"
30067"short8 __ovld __cnfn convert_short8_rtz(long8);\n"
30068"short8 __ovld __cnfn convert_short8_sat_rtz(long8);\n"
30069"short8 __ovld __cnfn convert_short8_rtp(long8);\n"
30070"short8 __ovld __cnfn convert_short8_sat_rtp(long8);\n"
30071"short8 __ovld __cnfn convert_short8_rtn(long8);\n"
30072"short8 __ovld __cnfn convert_short8_sat_rtn(long8);\n"
30073"short8 __ovld __cnfn convert_short8(long8);\n"
30074"short8 __ovld __cnfn convert_short8_sat(long8);\n"
30075"short8 __ovld __cnfn convert_short8_rte(ulong8);\n"
30076"short8 __ovld __cnfn convert_short8_sat_rte(ulong8);\n"
30077"short8 __ovld __cnfn convert_short8_rtz(ulong8);\n"
30078"short8 __ovld __cnfn convert_short8_sat_rtz(ulong8);\n"
30079"short8 __ovld __cnfn convert_short8_rtp(ulong8);\n"
30080"short8 __ovld __cnfn convert_short8_sat_rtp(ulong8);\n"
30081"short8 __ovld __cnfn convert_short8_rtn(ulong8);\n"
30082"short8 __ovld __cnfn convert_short8_sat_rtn(ulong8);\n"
30083"short8 __ovld __cnfn convert_short8(ulong8);\n"
30084"short8 __ovld __cnfn convert_short8_sat(ulong8);\n"
30085"short8 __ovld __cnfn convert_short8_rte(float8);\n"
30086"short8 __ovld __cnfn convert_short8_sat_rte(float8);\n"
30087"short8 __ovld __cnfn convert_short8_rtz(float8);\n"
30088"short8 __ovld __cnfn convert_short8_sat_rtz(float8);\n"
30089"short8 __ovld __cnfn convert_short8_rtp(float8);\n"
30090"short8 __ovld __cnfn convert_short8_sat_rtp(float8);\n"
30091"short8 __ovld __cnfn convert_short8_rtn(float8);\n"
30092"short8 __ovld __cnfn convert_short8_sat_rtn(float8);\n"
30093"short8 __ovld __cnfn convert_short8(float8);\n"
30094"short8 __ovld __cnfn convert_short8_sat(float8);\n"
30095"ushort8 __ovld __cnfn convert_ushort8_rte(char8);\n"
30096"ushort8 __ovld __cnfn convert_ushort8_sat_rte(char8);\n"
30097"ushort8 __ovld __cnfn convert_ushort8_rtz(char8);\n"
30098"ushort8 __ovld __cnfn convert_ushort8_sat_rtz(char8);\n"
30099"ushort8 __ovld __cnfn convert_ushort8_rtp(char8);\n"
30100"ushort8 __ovld __cnfn convert_ushort8_sat_rtp(char8);\n"
30101"ushort8 __ovld __cnfn convert_ushort8_rtn(char8);\n"
30102"ushort8 __ovld __cnfn convert_ushort8_sat_rtn(char8);\n"
30103"ushort8 __ovld __cnfn convert_ushort8(char8);\n"
30104"ushort8 __ovld __cnfn convert_ushort8_sat(char8);\n"
30105"ushort8 __ovld __cnfn convert_ushort8_rte(uchar8);\n"
30106"ushort8 __ovld __cnfn convert_ushort8_sat_rte(uchar8);\n"
30107"ushort8 __ovld __cnfn convert_ushort8_rtz(uchar8);\n"
30108"ushort8 __ovld __cnfn convert_ushort8_sat_rtz(uchar8);\n"
30109"ushort8 __ovld __cnfn convert_ushort8_rtp(uchar8);\n"
30110"ushort8 __ovld __cnfn convert_ushort8_sat_rtp(uchar8);\n"
30111"ushort8 __ovld __cnfn convert_ushort8_rtn(uchar8);\n"
30112"ushort8 __ovld __cnfn convert_ushort8_sat_rtn(uchar8);\n"
30113"ushort8 __ovld __cnfn convert_ushort8(uchar8);\n"
30114"ushort8 __ovld __cnfn convert_ushort8_sat(uchar8);\n"
30115"ushort8 __ovld __cnfn convert_ushort8_rte(short8);\n"
30116"ushort8 __ovld __cnfn convert_ushort8_sat_rte(short8);\n"
30117"ushort8 __ovld __cnfn convert_ushort8_rtz(short8);\n"
30118"ushort8 __ovld __cnfn convert_ushort8_sat_rtz(short8);\n"
30119"ushort8 __ovld __cnfn convert_ushort8_rtp(short8);\n"
30120"ushort8 __ovld __cnfn convert_ushort8_sat_rtp(short8);\n"
30121"ushort8 __ovld __cnfn convert_ushort8_rtn(short8);\n"
30122"ushort8 __ovld __cnfn convert_ushort8_sat_rtn(short8);\n"
30123"ushort8 __ovld __cnfn convert_ushort8(short8);\n"
30124"ushort8 __ovld __cnfn convert_ushort8_sat(short8);\n"
30125"ushort8 __ovld __cnfn convert_ushort8_rte(ushort8);\n"
30126"ushort8 __ovld __cnfn convert_ushort8_sat_rte(ushort8);\n"
30127"ushort8 __ovld __cnfn convert_ushort8_rtz(ushort8);\n"
30128"ushort8 __ovld __cnfn convert_ushort8_sat_rtz(ushort8);\n"
30129"ushort8 __ovld __cnfn convert_ushort8_rtp(ushort8);\n"
30130"ushort8 __ovld __cnfn convert_ushort8_sat_rtp(ushort8);\n"
30131"ushort8 __ovld __cnfn convert_ushort8_rtn(ushort8);\n"
30132"ushort8 __ovld __cnfn convert_ushort8_sat_rtn(ushort8);\n"
30133"ushort8 __ovld __cnfn convert_ushort8(ushort8);\n"
30134"ushort8 __ovld __cnfn convert_ushort8_sat(ushort8);\n"
30135"ushort8 __ovld __cnfn convert_ushort8_rte(int8);\n"
30136"ushort8 __ovld __cnfn convert_ushort8_sat_rte(int8);\n"
30137"ushort8 __ovld __cnfn convert_ushort8_rtz(int8);\n"
30138"ushort8 __ovld __cnfn convert_ushort8_sat_rtz(int8);\n"
30139"ushort8 __ovld __cnfn convert_ushort8_rtp(int8);\n"
30140"ushort8 __ovld __cnfn convert_ushort8_sat_rtp(int8);\n"
30141"ushort8 __ovld __cnfn convert_ushort8_rtn(int8);\n"
30142"ushort8 __ovld __cnfn convert_ushort8_sat_rtn(int8);\n"
30143"ushort8 __ovld __cnfn convert_ushort8(int8);\n"
30144"ushort8 __ovld __cnfn convert_ushort8_sat(int8);\n"
30145"ushort8 __ovld __cnfn convert_ushort8_rte(uint8);\n"
30146"ushort8 __ovld __cnfn convert_ushort8_sat_rte(uint8);\n"
30147"ushort8 __ovld __cnfn convert_ushort8_rtz(uint8);\n"
30148"ushort8 __ovld __cnfn convert_ushort8_sat_rtz(uint8);\n"
30149"ushort8 __ovld __cnfn convert_ushort8_rtp(uint8);\n"
30150"ushort8 __ovld __cnfn convert_ushort8_sat_rtp(uint8);\n"
30151"ushort8 __ovld __cnfn convert_ushort8_rtn(uint8);\n"
30152"ushort8 __ovld __cnfn convert_ushort8_sat_rtn(uint8);\n"
30153"ushort8 __ovld __cnfn convert_ushort8(uint8);\n"
30154"ushort8 __ovld __cnfn convert_ushort8_sat(uint8);\n"
30155"ushort8 __ovld __cnfn convert_ushort8_rte(long8);\n"
30156"ushort8 __ovld __cnfn convert_ushort8_sat_rte(long8);\n"
30157"ushort8 __ovld __cnfn convert_ushort8_rtz(long8);\n"
30158"ushort8 __ovld __cnfn convert_ushort8_sat_rtz(long8);\n"
30159"ushort8 __ovld __cnfn convert_ushort8_rtp(long8);\n"
30160"ushort8 __ovld __cnfn convert_ushort8_sat_rtp(long8);\n"
30161"ushort8 __ovld __cnfn convert_ushort8_rtn(long8);\n"
30162"ushort8 __ovld __cnfn convert_ushort8_sat_rtn(long8);\n"
30163"ushort8 __ovld __cnfn convert_ushort8(long8);\n"
30164"ushort8 __ovld __cnfn convert_ushort8_sat(long8);\n"
30165"ushort8 __ovld __cnfn convert_ushort8_rte(ulong8);\n"
30166"ushort8 __ovld __cnfn convert_ushort8_sat_rte(ulong8);\n"
30167"ushort8 __ovld __cnfn convert_ushort8_rtz(ulong8);\n"
30168"ushort8 __ovld __cnfn convert_ushort8_sat_rtz(ulong8);\n"
30169"ushort8 __ovld __cnfn convert_ushort8_rtp(ulong8);\n"
30170"ushort8 __ovld __cnfn convert_ushort8_sat_rtp(ulong8);\n"
30171"ushort8 __ovld __cnfn convert_ushort8_rtn(ulong8);\n"
30172"ushort8 __ovld __cnfn convert_ushort8_sat_rtn(ulong8);\n"
30173"ushort8 __ovld __cnfn convert_ushort8(ulong8);\n"
30174"ushort8 __ovld __cnfn convert_ushort8_sat(ulong8);\n"
30175"ushort8 __ovld __cnfn convert_ushort8_rte(float8);\n"
30176"ushort8 __ovld __cnfn convert_ushort8_sat_rte(float8);\n"
30177"ushort8 __ovld __cnfn convert_ushort8_rtz(float8);\n"
30178"ushort8 __ovld __cnfn convert_ushort8_sat_rtz(float8);\n"
30179"ushort8 __ovld __cnfn convert_ushort8_rtp(float8);\n"
30180"ushort8 __ovld __cnfn convert_ushort8_sat_rtp(float8);\n"
30181"ushort8 __ovld __cnfn convert_ushort8_rtn(float8);\n"
30182"ushort8 __ovld __cnfn convert_ushort8_sat_rtn(float8);\n"
30183"ushort8 __ovld __cnfn convert_ushort8(float8);\n"
30184"ushort8 __ovld __cnfn convert_ushort8_sat(float8);\n"
30185"int8 __ovld __cnfn convert_int8_rte(char8);\n"
30186"int8 __ovld __cnfn convert_int8_sat_rte(char8);\n"
30187"int8 __ovld __cnfn convert_int8_rtz(char8);\n"
30188"int8 __ovld __cnfn convert_int8_sat_rtz(char8);\n"
30189"int8 __ovld __cnfn convert_int8_rtp(char8);\n"
30190"int8 __ovld __cnfn convert_int8_sat_rtp(char8);\n"
30191"int8 __ovld __cnfn convert_int8_rtn(char8);\n"
30192"int8 __ovld __cnfn convert_int8_sat_rtn(char8);\n"
30193"int8 __ovld __cnfn convert_int8(char8);\n"
30194"int8 __ovld __cnfn convert_int8_sat(char8);\n"
30195"int8 __ovld __cnfn convert_int8_rte(uchar8);\n"
30196"int8 __ovld __cnfn convert_int8_sat_rte(uchar8);\n"
30197"int8 __ovld __cnfn convert_int8_rtz(uchar8);\n"
30198"int8 __ovld __cnfn convert_int8_sat_rtz(uchar8);\n"
30199"int8 __ovld __cnfn convert_int8_rtp(uchar8);\n"
30200"int8 __ovld __cnfn convert_int8_sat_rtp(uchar8);\n"
30201"int8 __ovld __cnfn convert_int8_rtn(uchar8);\n"
30202"int8 __ovld __cnfn convert_int8_sat_rtn(uchar8);\n"
30203"int8 __ovld __cnfn convert_int8(uchar8);\n"
30204"int8 __ovld __cnfn convert_int8_sat(uchar8);\n"
30205"int8 __ovld __cnfn convert_int8_rte(short8);\n"
30206"int8 __ovld __cnfn convert_int8_sat_rte(short8);\n"
30207"int8 __ovld __cnfn convert_int8_rtz(short8);\n"
30208"int8 __ovld __cnfn convert_int8_sat_rtz(short8);\n"
30209"int8 __ovld __cnfn convert_int8_rtp(short8);\n"
30210"int8 __ovld __cnfn convert_int8_sat_rtp(short8);\n"
30211"int8 __ovld __cnfn convert_int8_rtn(short8);\n"
30212"int8 __ovld __cnfn convert_int8_sat_rtn(short8);\n"
30213"int8 __ovld __cnfn convert_int8(short8);\n"
30214"int8 __ovld __cnfn convert_int8_sat(short8);\n"
30215"int8 __ovld __cnfn convert_int8_rte(ushort8);\n"
30216"int8 __ovld __cnfn convert_int8_sat_rte(ushort8);\n"
30217"int8 __ovld __cnfn convert_int8_rtz(ushort8);\n"
30218"int8 __ovld __cnfn convert_int8_sat_rtz(ushort8);\n"
30219"int8 __ovld __cnfn convert_int8_rtp(ushort8);\n"
30220"int8 __ovld __cnfn convert_int8_sat_rtp(ushort8);\n"
30221"int8 __ovld __cnfn convert_int8_rtn(ushort8);\n"
30222"int8 __ovld __cnfn convert_int8_sat_rtn(ushort8);\n"
30223"int8 __ovld __cnfn convert_int8(ushort8);\n"
30224"int8 __ovld __cnfn convert_int8_sat(ushort8);\n"
30225"int8 __ovld __cnfn convert_int8_rte(int8);\n"
30226"int8 __ovld __cnfn convert_int8_sat_rte(int8);\n"
30227"int8 __ovld __cnfn convert_int8_rtz(int8);\n"
30228"int8 __ovld __cnfn convert_int8_sat_rtz(int8);\n"
30229"int8 __ovld __cnfn convert_int8_rtp(int8);\n"
30230"int8 __ovld __cnfn convert_int8_sat_rtp(int8);\n"
30231"int8 __ovld __cnfn convert_int8_rtn(int8);\n"
30232"int8 __ovld __cnfn convert_int8_sat_rtn(int8);\n"
30233"int8 __ovld __cnfn convert_int8(int8);\n"
30234"int8 __ovld __cnfn convert_int8_sat(int8);\n"
30235"int8 __ovld __cnfn convert_int8_rte(uint8);\n"
30236"int8 __ovld __cnfn convert_int8_sat_rte(uint8);\n"
30237"int8 __ovld __cnfn convert_int8_rtz(uint8);\n"
30238"int8 __ovld __cnfn convert_int8_sat_rtz(uint8);\n"
30239"int8 __ovld __cnfn convert_int8_rtp(uint8);\n"
30240"int8 __ovld __cnfn convert_int8_sat_rtp(uint8);\n"
30241"int8 __ovld __cnfn convert_int8_rtn(uint8);\n"
30242"int8 __ovld __cnfn convert_int8_sat_rtn(uint8);\n"
30243"int8 __ovld __cnfn convert_int8(uint8);\n"
30244"int8 __ovld __cnfn convert_int8_sat(uint8);\n"
30245"int8 __ovld __cnfn convert_int8_rte(long8);\n"
30246"int8 __ovld __cnfn convert_int8_sat_rte(long8);\n"
30247"int8 __ovld __cnfn convert_int8_rtz(long8);\n"
30248"int8 __ovld __cnfn convert_int8_sat_rtz(long8);\n"
30249"int8 __ovld __cnfn convert_int8_rtp(long8);\n"
30250"int8 __ovld __cnfn convert_int8_sat_rtp(long8);\n"
30251"int8 __ovld __cnfn convert_int8_rtn(long8);\n"
30252"int8 __ovld __cnfn convert_int8_sat_rtn(long8);\n"
30253"int8 __ovld __cnfn convert_int8(long8);\n"
30254"int8 __ovld __cnfn convert_int8_sat(long8);\n"
30255"int8 __ovld __cnfn convert_int8_rte(ulong8);\n"
30256"int8 __ovld __cnfn convert_int8_sat_rte(ulong8);\n"
30257"int8 __ovld __cnfn convert_int8_rtz(ulong8);\n"
30258"int8 __ovld __cnfn convert_int8_sat_rtz(ulong8);\n"
30259"int8 __ovld __cnfn convert_int8_rtp(ulong8);\n"
30260"int8 __ovld __cnfn convert_int8_sat_rtp(ulong8);\n"
30261"int8 __ovld __cnfn convert_int8_rtn(ulong8);\n"
30262"int8 __ovld __cnfn convert_int8_sat_rtn(ulong8);\n"
30263"int8 __ovld __cnfn convert_int8(ulong8);\n"
30264"int8 __ovld __cnfn convert_int8_sat(ulong8);\n"
30265"int8 __ovld __cnfn convert_int8_rte(float8);\n"
30266"int8 __ovld __cnfn convert_int8_sat_rte(float8);\n"
30267"int8 __ovld __cnfn convert_int8_rtz(float8);\n"
30268"int8 __ovld __cnfn convert_int8_sat_rtz(float8);\n"
30269"int8 __ovld __cnfn convert_int8_rtp(float8);\n"
30270"int8 __ovld __cnfn convert_int8_sat_rtp(float8);\n"
30271"int8 __ovld __cnfn convert_int8_rtn(float8);\n"
30272"int8 __ovld __cnfn convert_int8_sat_rtn(float8);\n"
30273"int8 __ovld __cnfn convert_int8(float8);\n"
30274"int8 __ovld __cnfn convert_int8_sat(float8);\n"
30275"uint8 __ovld __cnfn convert_uint8_rte(char8);\n"
30276"uint8 __ovld __cnfn convert_uint8_sat_rte(char8);\n"
30277"uint8 __ovld __cnfn convert_uint8_rtz(char8);\n"
30278"uint8 __ovld __cnfn convert_uint8_sat_rtz(char8);\n"
30279"uint8 __ovld __cnfn convert_uint8_rtp(char8);\n"
30280"uint8 __ovld __cnfn convert_uint8_sat_rtp(char8);\n"
30281"uint8 __ovld __cnfn convert_uint8_rtn(char8);\n"
30282"uint8 __ovld __cnfn convert_uint8_sat_rtn(char8);\n"
30283"uint8 __ovld __cnfn convert_uint8(char8);\n"
30284"uint8 __ovld __cnfn convert_uint8_sat(char8);\n"
30285"uint8 __ovld __cnfn convert_uint8_rte(uchar8);\n"
30286"uint8 __ovld __cnfn convert_uint8_sat_rte(uchar8);\n"
30287"uint8 __ovld __cnfn convert_uint8_rtz(uchar8);\n"
30288"uint8 __ovld __cnfn convert_uint8_sat_rtz(uchar8);\n"
30289"uint8 __ovld __cnfn convert_uint8_rtp(uchar8);\n"
30290"uint8 __ovld __cnfn convert_uint8_sat_rtp(uchar8);\n"
30291"uint8 __ovld __cnfn convert_uint8_rtn(uchar8);\n"
30292"uint8 __ovld __cnfn convert_uint8_sat_rtn(uchar8);\n"
30293"uint8 __ovld __cnfn convert_uint8(uchar8);\n"
30294"uint8 __ovld __cnfn convert_uint8_sat(uchar8);\n"
30295"uint8 __ovld __cnfn convert_uint8_rte(short8);\n"
30296"uint8 __ovld __cnfn convert_uint8_sat_rte(short8);\n"
30297"uint8 __ovld __cnfn convert_uint8_rtz(short8);\n"
30298"uint8 __ovld __cnfn convert_uint8_sat_rtz(short8);\n"
30299"uint8 __ovld __cnfn convert_uint8_rtp(short8);\n"
30300"uint8 __ovld __cnfn convert_uint8_sat_rtp(short8);\n"
30301"uint8 __ovld __cnfn convert_uint8_rtn(short8);\n"
30302"uint8 __ovld __cnfn convert_uint8_sat_rtn(short8);\n"
30303"uint8 __ovld __cnfn convert_uint8(short8);\n"
30304"uint8 __ovld __cnfn convert_uint8_sat(short8);\n"
30305"uint8 __ovld __cnfn convert_uint8_rte(ushort8);\n"
30306"uint8 __ovld __cnfn convert_uint8_sat_rte(ushort8);\n"
30307"uint8 __ovld __cnfn convert_uint8_rtz(ushort8);\n"
30308"uint8 __ovld __cnfn convert_uint8_sat_rtz(ushort8);\n"
30309"uint8 __ovld __cnfn convert_uint8_rtp(ushort8);\n"
30310"uint8 __ovld __cnfn convert_uint8_sat_rtp(ushort8);\n"
30311"uint8 __ovld __cnfn convert_uint8_rtn(ushort8);\n"
30312"uint8 __ovld __cnfn convert_uint8_sat_rtn(ushort8);\n"
30313"uint8 __ovld __cnfn convert_uint8(ushort8);\n"
30314"uint8 __ovld __cnfn convert_uint8_sat(ushort8);\n"
30315"uint8 __ovld __cnfn convert_uint8_rte(int8);\n"
30316"uint8 __ovld __cnfn convert_uint8_sat_rte(int8);\n"
30317"uint8 __ovld __cnfn convert_uint8_rtz(int8);\n"
30318"uint8 __ovld __cnfn convert_uint8_sat_rtz(int8);\n"
30319"uint8 __ovld __cnfn convert_uint8_rtp(int8);\n"
30320"uint8 __ovld __cnfn convert_uint8_sat_rtp(int8);\n"
30321"uint8 __ovld __cnfn convert_uint8_rtn(int8);\n"
30322"uint8 __ovld __cnfn convert_uint8_sat_rtn(int8);\n"
30323"uint8 __ovld __cnfn convert_uint8(int8);\n"
30324"uint8 __ovld __cnfn convert_uint8_sat(int8);\n"
30325"uint8 __ovld __cnfn convert_uint8_rte(uint8);\n"
30326"uint8 __ovld __cnfn convert_uint8_sat_rte(uint8);\n"
30327"uint8 __ovld __cnfn convert_uint8_rtz(uint8);\n"
30328"uint8 __ovld __cnfn convert_uint8_sat_rtz(uint8);\n"
30329"uint8 __ovld __cnfn convert_uint8_rtp(uint8);\n"
30330"uint8 __ovld __cnfn convert_uint8_sat_rtp(uint8);\n"
30331"uint8 __ovld __cnfn convert_uint8_rtn(uint8);\n"
30332"uint8 __ovld __cnfn convert_uint8_sat_rtn(uint8);\n"
30333"uint8 __ovld __cnfn convert_uint8(uint8);\n"
30334"uint8 __ovld __cnfn convert_uint8_sat(uint8);\n"
30335"uint8 __ovld __cnfn convert_uint8_rte(long8);\n"
30336"uint8 __ovld __cnfn convert_uint8_sat_rte(long8);\n"
30337"uint8 __ovld __cnfn convert_uint8_rtz(long8);\n"
30338"uint8 __ovld __cnfn convert_uint8_sat_rtz(long8);\n"
30339"uint8 __ovld __cnfn convert_uint8_rtp(long8);\n"
30340"uint8 __ovld __cnfn convert_uint8_sat_rtp(long8);\n"
30341"uint8 __ovld __cnfn convert_uint8_rtn(long8);\n"
30342"uint8 __ovld __cnfn convert_uint8_sat_rtn(long8);\n"
30343"uint8 __ovld __cnfn convert_uint8(long8);\n"
30344"uint8 __ovld __cnfn convert_uint8_sat(long8);\n"
30345"uint8 __ovld __cnfn convert_uint8_rte(ulong8);\n"
30346"uint8 __ovld __cnfn convert_uint8_sat_rte(ulong8);\n"
30347"uint8 __ovld __cnfn convert_uint8_rtz(ulong8);\n"
30348"uint8 __ovld __cnfn convert_uint8_sat_rtz(ulong8);\n"
30349"uint8 __ovld __cnfn convert_uint8_rtp(ulong8);\n"
30350"uint8 __ovld __cnfn convert_uint8_sat_rtp(ulong8);\n"
30351"uint8 __ovld __cnfn convert_uint8_rtn(ulong8);\n"
30352"uint8 __ovld __cnfn convert_uint8_sat_rtn(ulong8);\n"
30353"uint8 __ovld __cnfn convert_uint8(ulong8);\n"
30354"uint8 __ovld __cnfn convert_uint8_sat(ulong8);\n"
30355"uint8 __ovld __cnfn convert_uint8_rte(float8);\n"
30356"uint8 __ovld __cnfn convert_uint8_sat_rte(float8);\n"
30357"uint8 __ovld __cnfn convert_uint8_rtz(float8);\n"
30358"uint8 __ovld __cnfn convert_uint8_sat_rtz(float8);\n"
30359"uint8 __ovld __cnfn convert_uint8_rtp(float8);\n"
30360"uint8 __ovld __cnfn convert_uint8_sat_rtp(float8);\n"
30361"uint8 __ovld __cnfn convert_uint8_rtn(float8);\n"
30362"uint8 __ovld __cnfn convert_uint8_sat_rtn(float8);\n"
30363"uint8 __ovld __cnfn convert_uint8(float8);\n"
30364"uint8 __ovld __cnfn convert_uint8_sat(float8);\n"
30365"long8 __ovld __cnfn convert_long8_rte(char8);\n"
30366"long8 __ovld __cnfn convert_long8_sat_rte(char8);\n"
30367"long8 __ovld __cnfn convert_long8_rtz(char8);\n"
30368"long8 __ovld __cnfn convert_long8_sat_rtz(char8);\n"
30369"long8 __ovld __cnfn convert_long8_rtp(char8);\n"
30370"long8 __ovld __cnfn convert_long8_sat_rtp(char8);\n"
30371"long8 __ovld __cnfn convert_long8_rtn(char8);\n"
30372"long8 __ovld __cnfn convert_long8_sat_rtn(char8);\n"
30373"long8 __ovld __cnfn convert_long8(char8);\n"
30374"long8 __ovld __cnfn convert_long8_sat(char8);\n"
30375"long8 __ovld __cnfn convert_long8_rte(uchar8);\n"
30376"long8 __ovld __cnfn convert_long8_sat_rte(uchar8);\n"
30377"long8 __ovld __cnfn convert_long8_rtz(uchar8);\n"
30378"long8 __ovld __cnfn convert_long8_sat_rtz(uchar8);\n"
30379"long8 __ovld __cnfn convert_long8_rtp(uchar8);\n"
30380"long8 __ovld __cnfn convert_long8_sat_rtp(uchar8);\n"
30381"long8 __ovld __cnfn convert_long8_rtn(uchar8);\n"
30382"long8 __ovld __cnfn convert_long8_sat_rtn(uchar8);\n"
30383"long8 __ovld __cnfn convert_long8(uchar8);\n"
30384"long8 __ovld __cnfn convert_long8_sat(uchar8);\n"
30385"long8 __ovld __cnfn convert_long8_rte(short8);\n"
30386"long8 __ovld __cnfn convert_long8_sat_rte(short8);\n"
30387"long8 __ovld __cnfn convert_long8_rtz(short8);\n"
30388"long8 __ovld __cnfn convert_long8_sat_rtz(short8);\n"
30389"long8 __ovld __cnfn convert_long8_rtp(short8);\n"
30390"long8 __ovld __cnfn convert_long8_sat_rtp(short8);\n"
30391"long8 __ovld __cnfn convert_long8_rtn(short8);\n"
30392"long8 __ovld __cnfn convert_long8_sat_rtn(short8);\n"
30393"long8 __ovld __cnfn convert_long8(short8);\n"
30394"long8 __ovld __cnfn convert_long8_sat(short8);\n"
30395"long8 __ovld __cnfn convert_long8_rte(ushort8);\n"
30396"long8 __ovld __cnfn convert_long8_sat_rte(ushort8);\n"
30397"long8 __ovld __cnfn convert_long8_rtz(ushort8);\n"
30398"long8 __ovld __cnfn convert_long8_sat_rtz(ushort8);\n"
30399"long8 __ovld __cnfn convert_long8_rtp(ushort8);\n"
30400"long8 __ovld __cnfn convert_long8_sat_rtp(ushort8);\n"
30401"long8 __ovld __cnfn convert_long8_rtn(ushort8);\n"
30402"long8 __ovld __cnfn convert_long8_sat_rtn(ushort8);\n"
30403"long8 __ovld __cnfn convert_long8(ushort8);\n"
30404"long8 __ovld __cnfn convert_long8_sat(ushort8);\n"
30405"long8 __ovld __cnfn convert_long8_rte(int8);\n"
30406"long8 __ovld __cnfn convert_long8_sat_rte(int8);\n"
30407"long8 __ovld __cnfn convert_long8_rtz(int8);\n"
30408"long8 __ovld __cnfn convert_long8_sat_rtz(int8);\n"
30409"long8 __ovld __cnfn convert_long8_rtp(int8);\n"
30410"long8 __ovld __cnfn convert_long8_sat_rtp(int8);\n"
30411"long8 __ovld __cnfn convert_long8_rtn(int8);\n"
30412"long8 __ovld __cnfn convert_long8_sat_rtn(int8);\n"
30413"long8 __ovld __cnfn convert_long8(int8);\n"
30414"long8 __ovld __cnfn convert_long8_sat(int8);\n"
30415"long8 __ovld __cnfn convert_long8_rte(uint8);\n"
30416"long8 __ovld __cnfn convert_long8_sat_rte(uint8);\n"
30417"long8 __ovld __cnfn convert_long8_rtz(uint8);\n"
30418"long8 __ovld __cnfn convert_long8_sat_rtz(uint8);\n"
30419"long8 __ovld __cnfn convert_long8_rtp(uint8);\n"
30420"long8 __ovld __cnfn convert_long8_sat_rtp(uint8);\n"
30421"long8 __ovld __cnfn convert_long8_rtn(uint8);\n"
30422"long8 __ovld __cnfn convert_long8_sat_rtn(uint8);\n"
30423"long8 __ovld __cnfn convert_long8(uint8);\n"
30424"long8 __ovld __cnfn convert_long8_sat(uint8);\n"
30425"long8 __ovld __cnfn convert_long8_rte(long8);\n"
30426"long8 __ovld __cnfn convert_long8_sat_rte(long8);\n"
30427"long8 __ovld __cnfn convert_long8_rtz(long8);\n"
30428"long8 __ovld __cnfn convert_long8_sat_rtz(long8);\n"
30429"long8 __ovld __cnfn convert_long8_rtp(long8);\n"
30430"long8 __ovld __cnfn convert_long8_sat_rtp(long8);\n"
30431"long8 __ovld __cnfn convert_long8_rtn(long8);\n"
30432"long8 __ovld __cnfn convert_long8_sat_rtn(long8);\n"
30433"long8 __ovld __cnfn convert_long8(long8);\n"
30434"long8 __ovld __cnfn convert_long8_sat(long8);\n"
30435"long8 __ovld __cnfn convert_long8_rte(ulong8);\n"
30436"long8 __ovld __cnfn convert_long8_sat_rte(ulong8);\n"
30437"long8 __ovld __cnfn convert_long8_rtz(ulong8);\n"
30438"long8 __ovld __cnfn convert_long8_sat_rtz(ulong8);\n"
30439"long8 __ovld __cnfn convert_long8_rtp(ulong8);\n"
30440"long8 __ovld __cnfn convert_long8_sat_rtp(ulong8);\n"
30441"long8 __ovld __cnfn convert_long8_rtn(ulong8);\n"
30442"long8 __ovld __cnfn convert_long8_sat_rtn(ulong8);\n"
30443"long8 __ovld __cnfn convert_long8(ulong8);\n"
30444"long8 __ovld __cnfn convert_long8_sat(ulong8);\n"
30445"long8 __ovld __cnfn convert_long8_rte(float8);\n"
30446"long8 __ovld __cnfn convert_long8_sat_rte(float8);\n"
30447"long8 __ovld __cnfn convert_long8_rtz(float8);\n"
30448"long8 __ovld __cnfn convert_long8_sat_rtz(float8);\n"
30449"long8 __ovld __cnfn convert_long8_rtp(float8);\n"
30450"long8 __ovld __cnfn convert_long8_sat_rtp(float8);\n"
30451"long8 __ovld __cnfn convert_long8_rtn(float8);\n"
30452"long8 __ovld __cnfn convert_long8_sat_rtn(float8);\n"
30453"long8 __ovld __cnfn convert_long8(float8);\n"
30454"long8 __ovld __cnfn convert_long8_sat(float8);\n"
30455"ulong8 __ovld __cnfn convert_ulong8_rte(char8);\n"
30456"ulong8 __ovld __cnfn convert_ulong8_sat_rte(char8);\n"
30457"ulong8 __ovld __cnfn convert_ulong8_rtz(char8);\n"
30458"ulong8 __ovld __cnfn convert_ulong8_sat_rtz(char8);\n"
30459"ulong8 __ovld __cnfn convert_ulong8_rtp(char8);\n"
30460"ulong8 __ovld __cnfn convert_ulong8_sat_rtp(char8);\n"
30461"ulong8 __ovld __cnfn convert_ulong8_rtn(char8);\n"
30462"ulong8 __ovld __cnfn convert_ulong8_sat_rtn(char8);\n"
30463"ulong8 __ovld __cnfn convert_ulong8(char8);\n"
30464"ulong8 __ovld __cnfn convert_ulong8_sat(char8);\n"
30465"ulong8 __ovld __cnfn convert_ulong8_rte(uchar8);\n"
30466"ulong8 __ovld __cnfn convert_ulong8_sat_rte(uchar8);\n"
30467"ulong8 __ovld __cnfn convert_ulong8_rtz(uchar8);\n"
30468"ulong8 __ovld __cnfn convert_ulong8_sat_rtz(uchar8);\n"
30469"ulong8 __ovld __cnfn convert_ulong8_rtp(uchar8);\n"
30470"ulong8 __ovld __cnfn convert_ulong8_sat_rtp(uchar8);\n"
30471"ulong8 __ovld __cnfn convert_ulong8_rtn(uchar8);\n"
30472"ulong8 __ovld __cnfn convert_ulong8_sat_rtn(uchar8);\n"
30473"ulong8 __ovld __cnfn convert_ulong8(uchar8);\n"
30474"ulong8 __ovld __cnfn convert_ulong8_sat(uchar8);\n"
30475"ulong8 __ovld __cnfn convert_ulong8_rte(short8);\n"
30476"ulong8 __ovld __cnfn convert_ulong8_sat_rte(short8);\n"
30477"ulong8 __ovld __cnfn convert_ulong8_rtz(short8);\n"
30478"ulong8 __ovld __cnfn convert_ulong8_sat_rtz(short8);\n"
30479"ulong8 __ovld __cnfn convert_ulong8_rtp(short8);\n"
30480"ulong8 __ovld __cnfn convert_ulong8_sat_rtp(short8);\n"
30481"ulong8 __ovld __cnfn convert_ulong8_rtn(short8);\n"
30482"ulong8 __ovld __cnfn convert_ulong8_sat_rtn(short8);\n"
30483"ulong8 __ovld __cnfn convert_ulong8(short8);\n"
30484"ulong8 __ovld __cnfn convert_ulong8_sat(short8);\n"
30485"ulong8 __ovld __cnfn convert_ulong8_rte(ushort8);\n"
30486"ulong8 __ovld __cnfn convert_ulong8_sat_rte(ushort8);\n"
30487"ulong8 __ovld __cnfn convert_ulong8_rtz(ushort8);\n"
30488"ulong8 __ovld __cnfn convert_ulong8_sat_rtz(ushort8);\n"
30489"ulong8 __ovld __cnfn convert_ulong8_rtp(ushort8);\n"
30490"ulong8 __ovld __cnfn convert_ulong8_sat_rtp(ushort8);\n"
30491"ulong8 __ovld __cnfn convert_ulong8_rtn(ushort8);\n"
30492"ulong8 __ovld __cnfn convert_ulong8_sat_rtn(ushort8);\n"
30493"ulong8 __ovld __cnfn convert_ulong8(ushort8);\n"
30494"ulong8 __ovld __cnfn convert_ulong8_sat(ushort8);\n"
30495"ulong8 __ovld __cnfn convert_ulong8_rte(int8);\n"
30496"ulong8 __ovld __cnfn convert_ulong8_sat_rte(int8);\n"
30497"ulong8 __ovld __cnfn convert_ulong8_rtz(int8);\n"
30498"ulong8 __ovld __cnfn convert_ulong8_sat_rtz(int8);\n"
30499"ulong8 __ovld __cnfn convert_ulong8_rtp(int8);\n"
30500"ulong8 __ovld __cnfn convert_ulong8_sat_rtp(int8);\n"
30501"ulong8 __ovld __cnfn convert_ulong8_rtn(int8);\n"
30502"ulong8 __ovld __cnfn convert_ulong8_sat_rtn(int8);\n"
30503"ulong8 __ovld __cnfn convert_ulong8(int8);\n"
30504"ulong8 __ovld __cnfn convert_ulong8_sat(int8);\n"
30505"ulong8 __ovld __cnfn convert_ulong8_rte(uint8);\n"
30506"ulong8 __ovld __cnfn convert_ulong8_sat_rte(uint8);\n"
30507"ulong8 __ovld __cnfn convert_ulong8_rtz(uint8);\n"
30508"ulong8 __ovld __cnfn convert_ulong8_sat_rtz(uint8);\n"
30509"ulong8 __ovld __cnfn convert_ulong8_rtp(uint8);\n"
30510"ulong8 __ovld __cnfn convert_ulong8_sat_rtp(uint8);\n"
30511"ulong8 __ovld __cnfn convert_ulong8_rtn(uint8);\n"
30512"ulong8 __ovld __cnfn convert_ulong8_sat_rtn(uint8);\n"
30513"ulong8 __ovld __cnfn convert_ulong8(uint8);\n"
30514"ulong8 __ovld __cnfn convert_ulong8_sat(uint8);\n"
30515"ulong8 __ovld __cnfn convert_ulong8_rte(long8);\n"
30516"ulong8 __ovld __cnfn convert_ulong8_sat_rte(long8);\n"
30517"ulong8 __ovld __cnfn convert_ulong8_rtz(long8);\n"
30518"ulong8 __ovld __cnfn convert_ulong8_sat_rtz(long8);\n"
30519"ulong8 __ovld __cnfn convert_ulong8_rtp(long8);\n"
30520"ulong8 __ovld __cnfn convert_ulong8_sat_rtp(long8);\n"
30521"ulong8 __ovld __cnfn convert_ulong8_rtn(long8);\n"
30522"ulong8 __ovld __cnfn convert_ulong8_sat_rtn(long8);\n"
30523"ulong8 __ovld __cnfn convert_ulong8(long8);\n"
30524"ulong8 __ovld __cnfn convert_ulong8_sat(long8);\n"
30525"ulong8 __ovld __cnfn convert_ulong8_rte(ulong8);\n"
30526"ulong8 __ovld __cnfn convert_ulong8_sat_rte(ulong8);\n"
30527"ulong8 __ovld __cnfn convert_ulong8_rtz(ulong8);\n"
30528"ulong8 __ovld __cnfn convert_ulong8_sat_rtz(ulong8);\n"
30529"ulong8 __ovld __cnfn convert_ulong8_rtp(ulong8);\n"
30530"ulong8 __ovld __cnfn convert_ulong8_sat_rtp(ulong8);\n"
30531"ulong8 __ovld __cnfn convert_ulong8_rtn(ulong8);\n"
30532"ulong8 __ovld __cnfn convert_ulong8_sat_rtn(ulong8);\n"
30533"ulong8 __ovld __cnfn convert_ulong8(ulong8);\n"
30534"ulong8 __ovld __cnfn convert_ulong8_sat(ulong8);\n"
30535"ulong8 __ovld __cnfn convert_ulong8_rte(float8);\n"
30536"ulong8 __ovld __cnfn convert_ulong8_sat_rte(float8);\n"
30537"ulong8 __ovld __cnfn convert_ulong8_rtz(float8);\n"
30538"ulong8 __ovld __cnfn convert_ulong8_sat_rtz(float8);\n"
30539"ulong8 __ovld __cnfn convert_ulong8_rtp(float8);\n"
30540"ulong8 __ovld __cnfn convert_ulong8_sat_rtp(float8);\n"
30541"ulong8 __ovld __cnfn convert_ulong8_rtn(float8);\n"
30542"ulong8 __ovld __cnfn convert_ulong8_sat_rtn(float8);\n"
30543"ulong8 __ovld __cnfn convert_ulong8(float8);\n"
30544"ulong8 __ovld __cnfn convert_ulong8_sat(float8);\n"
30545"float8 __ovld __cnfn convert_float8_rte(char8);\n"
30546"float8 __ovld __cnfn convert_float8_rtz(char8);\n"
30547"float8 __ovld __cnfn convert_float8_rtp(char8);\n"
30548"float8 __ovld __cnfn convert_float8_rtn(char8);\n"
30549"float8 __ovld __cnfn convert_float8(char8);\n"
30550"float8 __ovld __cnfn convert_float8_rte(uchar8);\n"
30551"float8 __ovld __cnfn convert_float8_rtz(uchar8);\n"
30552"float8 __ovld __cnfn convert_float8_rtp(uchar8);\n"
30553"float8 __ovld __cnfn convert_float8_rtn(uchar8);\n"
30554"float8 __ovld __cnfn convert_float8(uchar8);\n"
30555"float8 __ovld __cnfn convert_float8_rte(short8);\n"
30556"float8 __ovld __cnfn convert_float8_rtz(short8);\n"
30557"float8 __ovld __cnfn convert_float8_rtp(short8);\n"
30558"float8 __ovld __cnfn convert_float8_rtn(short8);\n"
30559"float8 __ovld __cnfn convert_float8(short8);\n"
30560"float8 __ovld __cnfn convert_float8_rte(ushort8);\n"
30561"float8 __ovld __cnfn convert_float8_rtz(ushort8);\n"
30562"float8 __ovld __cnfn convert_float8_rtp(ushort8);\n"
30563"float8 __ovld __cnfn convert_float8_rtn(ushort8);\n"
30564"float8 __ovld __cnfn convert_float8(ushort8);\n"
30565"float8 __ovld __cnfn convert_float8_rte(int8);\n"
30566"float8 __ovld __cnfn convert_float8_rtz(int8);\n"
30567"float8 __ovld __cnfn convert_float8_rtp(int8);\n"
30568"float8 __ovld __cnfn convert_float8_rtn(int8);\n"
30569"float8 __ovld __cnfn convert_float8(int8);\n"
30570"float8 __ovld __cnfn convert_float8_rte(uint8);\n"
30571"float8 __ovld __cnfn convert_float8_rtz(uint8);\n"
30572"float8 __ovld __cnfn convert_float8_rtp(uint8);\n"
30573"float8 __ovld __cnfn convert_float8_rtn(uint8);\n"
30574"float8 __ovld __cnfn convert_float8(uint8);\n"
30575"float8 __ovld __cnfn convert_float8_rte(long8);\n"
30576"float8 __ovld __cnfn convert_float8_rtz(long8);\n"
30577"float8 __ovld __cnfn convert_float8_rtp(long8);\n"
30578"float8 __ovld __cnfn convert_float8_rtn(long8);\n"
30579"float8 __ovld __cnfn convert_float8(long8);\n"
30580"float8 __ovld __cnfn convert_float8_rte(ulong8);\n"
30581"float8 __ovld __cnfn convert_float8_rtz(ulong8);\n"
30582"float8 __ovld __cnfn convert_float8_rtp(ulong8);\n"
30583"float8 __ovld __cnfn convert_float8_rtn(ulong8);\n"
30584"float8 __ovld __cnfn convert_float8(ulong8);\n"
30585"float8 __ovld __cnfn convert_float8_rte(float8);\n"
30586"float8 __ovld __cnfn convert_float8_rtz(float8);\n"
30587"float8 __ovld __cnfn convert_float8_rtp(float8);\n"
30588"float8 __ovld __cnfn convert_float8_rtn(float8);\n"
30589"float8 __ovld __cnfn convert_float8(float8);\n"
30590"char16 __ovld __cnfn convert_char16_rte(char16);\n"
30591"char16 __ovld __cnfn convert_char16_sat_rte(char16);\n"
30592"char16 __ovld __cnfn convert_char16_rtz(char16);\n"
30593"char16 __ovld __cnfn convert_char16_sat_rtz(char16);\n"
30594"char16 __ovld __cnfn convert_char16_rtp(char16);\n"
30595"char16 __ovld __cnfn convert_char16_sat_rtp(char16);\n"
30596"char16 __ovld __cnfn convert_char16_rtn(char16);\n"
30597"char16 __ovld __cnfn convert_char16_sat_rtn(char16);\n"
30598"char16 __ovld __cnfn convert_char16(char16);\n"
30599"char16 __ovld __cnfn convert_char16_sat(char16);\n"
30600"char16 __ovld __cnfn convert_char16_rte(uchar16);\n"
30601"char16 __ovld __cnfn convert_char16_sat_rte(uchar16);\n"
30602"char16 __ovld __cnfn convert_char16_rtz(uchar16);\n"
30603"char16 __ovld __cnfn convert_char16_sat_rtz(uchar16);\n"
30604"char16 __ovld __cnfn convert_char16_rtp(uchar16);\n"
30605"char16 __ovld __cnfn convert_char16_sat_rtp(uchar16);\n"
30606"char16 __ovld __cnfn convert_char16_rtn(uchar16);\n"
30607"char16 __ovld __cnfn convert_char16_sat_rtn(uchar16);\n"
30608"char16 __ovld __cnfn convert_char16(uchar16);\n"
30609"char16 __ovld __cnfn convert_char16_sat(uchar16);\n"
30610"char16 __ovld __cnfn convert_char16_rte(short16);\n"
30611"char16 __ovld __cnfn convert_char16_sat_rte(short16);\n"
30612"char16 __ovld __cnfn convert_char16_rtz(short16);\n"
30613"char16 __ovld __cnfn convert_char16_sat_rtz(short16);\n"
30614"char16 __ovld __cnfn convert_char16_rtp(short16);\n"
30615"char16 __ovld __cnfn convert_char16_sat_rtp(short16);\n"
30616"char16 __ovld __cnfn convert_char16_rtn(short16);\n"
30617"char16 __ovld __cnfn convert_char16_sat_rtn(short16);\n"
30618"char16 __ovld __cnfn convert_char16(short16);\n"
30619"char16 __ovld __cnfn convert_char16_sat(short16);\n"
30620"char16 __ovld __cnfn convert_char16_rte(ushort16);\n"
30621"char16 __ovld __cnfn convert_char16_sat_rte(ushort16);\n"
30622"char16 __ovld __cnfn convert_char16_rtz(ushort16);\n"
30623"char16 __ovld __cnfn convert_char16_sat_rtz(ushort16);\n"
30624"char16 __ovld __cnfn convert_char16_rtp(ushort16);\n"
30625"char16 __ovld __cnfn convert_char16_sat_rtp(ushort16);\n"
30626"char16 __ovld __cnfn convert_char16_rtn(ushort16);\n"
30627"char16 __ovld __cnfn convert_char16_sat_rtn(ushort16);\n"
30628"char16 __ovld __cnfn convert_char16(ushort16);\n"
30629"char16 __ovld __cnfn convert_char16_sat(ushort16);\n"
30630"char16 __ovld __cnfn convert_char16_rte(int16);\n"
30631"char16 __ovld __cnfn convert_char16_sat_rte(int16);\n"
30632"char16 __ovld __cnfn convert_char16_rtz(int16);\n"
30633"char16 __ovld __cnfn convert_char16_sat_rtz(int16);\n"
30634"char16 __ovld __cnfn convert_char16_rtp(int16);\n"
30635"char16 __ovld __cnfn convert_char16_sat_rtp(int16);\n"
30636"char16 __ovld __cnfn convert_char16_rtn(int16);\n"
30637"char16 __ovld __cnfn convert_char16_sat_rtn(int16);\n"
30638"char16 __ovld __cnfn convert_char16(int16);\n"
30639"char16 __ovld __cnfn convert_char16_sat(int16);\n"
30640"char16 __ovld __cnfn convert_char16_rte(uint16);\n"
30641"char16 __ovld __cnfn convert_char16_sat_rte(uint16);\n"
30642"char16 __ovld __cnfn convert_char16_rtz(uint16);\n"
30643"char16 __ovld __cnfn convert_char16_sat_rtz(uint16);\n"
30644"char16 __ovld __cnfn convert_char16_rtp(uint16);\n"
30645"char16 __ovld __cnfn convert_char16_sat_rtp(uint16);\n"
30646"char16 __ovld __cnfn convert_char16_rtn(uint16);\n"
30647"char16 __ovld __cnfn convert_char16_sat_rtn(uint16);\n"
30648"char16 __ovld __cnfn convert_char16(uint16);\n"
30649"char16 __ovld __cnfn convert_char16_sat(uint16);\n"
30650"char16 __ovld __cnfn convert_char16_rte(long16);\n"
30651"char16 __ovld __cnfn convert_char16_sat_rte(long16);\n"
30652"char16 __ovld __cnfn convert_char16_rtz(long16);\n"
30653"char16 __ovld __cnfn convert_char16_sat_rtz(long16);\n"
30654"char16 __ovld __cnfn convert_char16_rtp(long16);\n"
30655"char16 __ovld __cnfn convert_char16_sat_rtp(long16);\n"
30656"char16 __ovld __cnfn convert_char16_rtn(long16);\n"
30657"char16 __ovld __cnfn convert_char16_sat_rtn(long16);\n"
30658"char16 __ovld __cnfn convert_char16(long16);\n"
30659"char16 __ovld __cnfn convert_char16_sat(long16);\n"
30660"char16 __ovld __cnfn convert_char16_rte(ulong16);\n"
30661"char16 __ovld __cnfn convert_char16_sat_rte(ulong16);\n"
30662"char16 __ovld __cnfn convert_char16_rtz(ulong16);\n"
30663"char16 __ovld __cnfn convert_char16_sat_rtz(ulong16);\n"
30664"char16 __ovld __cnfn convert_char16_rtp(ulong16);\n"
30665"char16 __ovld __cnfn convert_char16_sat_rtp(ulong16);\n"
30666"char16 __ovld __cnfn convert_char16_rtn(ulong16);\n"
30667"char16 __ovld __cnfn convert_char16_sat_rtn(ulong16);\n"
30668"char16 __ovld __cnfn convert_char16(ulong16);\n"
30669"char16 __ovld __cnfn convert_char16_sat(ulong16);\n"
30670"char16 __ovld __cnfn convert_char16_rte(float16);\n"
30671"char16 __ovld __cnfn convert_char16_sat_rte(float16);\n"
30672"char16 __ovld __cnfn convert_char16_rtz(float16);\n"
30673"char16 __ovld __cnfn convert_char16_sat_rtz(float16);\n"
30674"char16 __ovld __cnfn convert_char16_rtp(float16);\n"
30675"char16 __ovld __cnfn convert_char16_sat_rtp(float16);\n"
30676"char16 __ovld __cnfn convert_char16_rtn(float16);\n"
30677"char16 __ovld __cnfn convert_char16_sat_rtn(float16);\n"
30678"char16 __ovld __cnfn convert_char16(float16);\n"
30679"char16 __ovld __cnfn convert_char16_sat(float16);\n"
30680"uchar16 __ovld __cnfn convert_uchar16_rte(char16);\n"
30681"uchar16 __ovld __cnfn convert_uchar16_sat_rte(char16);\n"
30682"uchar16 __ovld __cnfn convert_uchar16_rtz(char16);\n"
30683"uchar16 __ovld __cnfn convert_uchar16_sat_rtz(char16);\n"
30684"uchar16 __ovld __cnfn convert_uchar16_rtp(char16);\n"
30685"uchar16 __ovld __cnfn convert_uchar16_sat_rtp(char16);\n"
30686"uchar16 __ovld __cnfn convert_uchar16_rtn(char16);\n"
30687"uchar16 __ovld __cnfn convert_uchar16_sat_rtn(char16);\n"
30688"uchar16 __ovld __cnfn convert_uchar16(char16);\n"
30689"uchar16 __ovld __cnfn convert_uchar16_sat(char16);\n"
30690"uchar16 __ovld __cnfn convert_uchar16_rte(uchar16);\n"
30691"uchar16 __ovld __cnfn convert_uchar16_sat_rte(uchar16);\n"
30692"uchar16 __ovld __cnfn convert_uchar16_rtz(uchar16);\n"
30693"uchar16 __ovld __cnfn convert_uchar16_sat_rtz(uchar16);\n"
30694"uchar16 __ovld __cnfn convert_uchar16_rtp(uchar16);\n"
30695"uchar16 __ovld __cnfn convert_uchar16_sat_rtp(uchar16);\n"
30696"uchar16 __ovld __cnfn convert_uchar16_rtn(uchar16);\n"
30697"uchar16 __ovld __cnfn convert_uchar16_sat_rtn(uchar16);\n"
30698"uchar16 __ovld __cnfn convert_uchar16(uchar16);\n"
30699"uchar16 __ovld __cnfn convert_uchar16_sat(uchar16);\n"
30700"uchar16 __ovld __cnfn convert_uchar16_rte(short16);\n"
30701"uchar16 __ovld __cnfn convert_uchar16_sat_rte(short16);\n"
30702"uchar16 __ovld __cnfn convert_uchar16_rtz(short16);\n"
30703"uchar16 __ovld __cnfn convert_uchar16_sat_rtz(short16);\n"
30704"uchar16 __ovld __cnfn convert_uchar16_rtp(short16);\n"
30705"uchar16 __ovld __cnfn convert_uchar16_sat_rtp(short16);\n"
30706"uchar16 __ovld __cnfn convert_uchar16_rtn(short16);\n"
30707"uchar16 __ovld __cnfn convert_uchar16_sat_rtn(short16);\n"
30708"uchar16 __ovld __cnfn convert_uchar16(short16);\n"
30709"uchar16 __ovld __cnfn convert_uchar16_sat(short16);\n"
30710"uchar16 __ovld __cnfn convert_uchar16_rte(ushort16);\n"
30711"uchar16 __ovld __cnfn convert_uchar16_sat_rte(ushort16);\n"
30712"uchar16 __ovld __cnfn convert_uchar16_rtz(ushort16);\n"
30713"uchar16 __ovld __cnfn convert_uchar16_sat_rtz(ushort16);\n"
30714"uchar16 __ovld __cnfn convert_uchar16_rtp(ushort16);\n"
30715"uchar16 __ovld __cnfn convert_uchar16_sat_rtp(ushort16);\n"
30716"uchar16 __ovld __cnfn convert_uchar16_rtn(ushort16);\n"
30717"uchar16 __ovld __cnfn convert_uchar16_sat_rtn(ushort16);\n"
30718"uchar16 __ovld __cnfn convert_uchar16(ushort16);\n"
30719"uchar16 __ovld __cnfn convert_uchar16_sat(ushort16);\n"
30720"uchar16 __ovld __cnfn convert_uchar16_rte(int16);\n"
30721"uchar16 __ovld __cnfn convert_uchar16_sat_rte(int16);\n"
30722"uchar16 __ovld __cnfn convert_uchar16_rtz(int16);\n"
30723"uchar16 __ovld __cnfn convert_uchar16_sat_rtz(int16);\n"
30724"uchar16 __ovld __cnfn convert_uchar16_rtp(int16);\n"
30725"uchar16 __ovld __cnfn convert_uchar16_sat_rtp(int16);\n"
30726"uchar16 __ovld __cnfn convert_uchar16_rtn(int16);\n"
30727"uchar16 __ovld __cnfn convert_uchar16_sat_rtn(int16);\n"
30728"uchar16 __ovld __cnfn convert_uchar16(int16);\n"
30729"uchar16 __ovld __cnfn convert_uchar16_sat(int16);\n"
30730"uchar16 __ovld __cnfn convert_uchar16_rte(uint16);\n"
30731"uchar16 __ovld __cnfn convert_uchar16_sat_rte(uint16);\n"
30732"uchar16 __ovld __cnfn convert_uchar16_rtz(uint16);\n"
30733"uchar16 __ovld __cnfn convert_uchar16_sat_rtz(uint16);\n"
30734"uchar16 __ovld __cnfn convert_uchar16_rtp(uint16);\n"
30735"uchar16 __ovld __cnfn convert_uchar16_sat_rtp(uint16);\n"
30736"uchar16 __ovld __cnfn convert_uchar16_rtn(uint16);\n"
30737"uchar16 __ovld __cnfn convert_uchar16_sat_rtn(uint16);\n"
30738"uchar16 __ovld __cnfn convert_uchar16(uint16);\n"
30739"uchar16 __ovld __cnfn convert_uchar16_sat(uint16);\n"
30740"uchar16 __ovld __cnfn convert_uchar16_rte(long16);\n"
30741"uchar16 __ovld __cnfn convert_uchar16_sat_rte(long16);\n"
30742"uchar16 __ovld __cnfn convert_uchar16_rtz(long16);\n"
30743"uchar16 __ovld __cnfn convert_uchar16_sat_rtz(long16);\n"
30744"uchar16 __ovld __cnfn convert_uchar16_rtp(long16);\n"
30745"uchar16 __ovld __cnfn convert_uchar16_sat_rtp(long16);\n"
30746"uchar16 __ovld __cnfn convert_uchar16_rtn(long16);\n"
30747"uchar16 __ovld __cnfn convert_uchar16_sat_rtn(long16);\n"
30748"uchar16 __ovld __cnfn convert_uchar16(long16);\n"
30749"uchar16 __ovld __cnfn convert_uchar16_sat(long16);\n"
30750"uchar16 __ovld __cnfn convert_uchar16_rte(ulong16);\n"
30751"uchar16 __ovld __cnfn convert_uchar16_sat_rte(ulong16);\n"
30752"uchar16 __ovld __cnfn convert_uchar16_rtz(ulong16);\n"
30753"uchar16 __ovld __cnfn convert_uchar16_sat_rtz(ulong16);\n"
30754"uchar16 __ovld __cnfn convert_uchar16_rtp(ulong16);\n"
30755"uchar16 __ovld __cnfn convert_uchar16_sat_rtp(ulong16);\n"
30756"uchar16 __ovld __cnfn convert_uchar16_rtn(ulong16);\n"
30757"uchar16 __ovld __cnfn convert_uchar16_sat_rtn(ulong16);\n"
30758"uchar16 __ovld __cnfn convert_uchar16(ulong16);\n"
30759"uchar16 __ovld __cnfn convert_uchar16_sat(ulong16);\n"
30760"uchar16 __ovld __cnfn convert_uchar16_rte(float16);\n"
30761"uchar16 __ovld __cnfn convert_uchar16_sat_rte(float16);\n"
30762"uchar16 __ovld __cnfn convert_uchar16_rtz(float16);\n"
30763"uchar16 __ovld __cnfn convert_uchar16_sat_rtz(float16);\n"
30764"uchar16 __ovld __cnfn convert_uchar16_rtp(float16);\n"
30765"uchar16 __ovld __cnfn convert_uchar16_sat_rtp(float16);\n"
30766"uchar16 __ovld __cnfn convert_uchar16_rtn(float16);\n"
30767"uchar16 __ovld __cnfn convert_uchar16_sat_rtn(float16);\n"
30768"uchar16 __ovld __cnfn convert_uchar16(float16);\n"
30769"uchar16 __ovld __cnfn convert_uchar16_sat(float16);\n"
30770"short16 __ovld __cnfn convert_short16_rte(char16);\n"
30771"short16 __ovld __cnfn convert_short16_sat_rte(char16);\n"
30772"short16 __ovld __cnfn convert_short16_rtz(char16);\n"
30773"short16 __ovld __cnfn convert_short16_sat_rtz(char16);\n"
30774"short16 __ovld __cnfn convert_short16_rtp(char16);\n"
30775"short16 __ovld __cnfn convert_short16_sat_rtp(char16);\n"
30776"short16 __ovld __cnfn convert_short16_rtn(char16);\n"
30777"short16 __ovld __cnfn convert_short16_sat_rtn(char16);\n"
30778"short16 __ovld __cnfn convert_short16(char16);\n"
30779"short16 __ovld __cnfn convert_short16_sat(char16);\n"
30780"short16 __ovld __cnfn convert_short16_rte(uchar16);\n"
30781"short16 __ovld __cnfn convert_short16_sat_rte(uchar16);\n"
30782"short16 __ovld __cnfn convert_short16_rtz(uchar16);\n"
30783"short16 __ovld __cnfn convert_short16_sat_rtz(uchar16);\n"
30784"short16 __ovld __cnfn convert_short16_rtp(uchar16);\n"
30785"short16 __ovld __cnfn convert_short16_sat_rtp(uchar16);\n"
30786"short16 __ovld __cnfn convert_short16_rtn(uchar16);\n"
30787"short16 __ovld __cnfn convert_short16_sat_rtn(uchar16);\n"
30788"short16 __ovld __cnfn convert_short16(uchar16);\n"
30789"short16 __ovld __cnfn convert_short16_sat(uchar16);\n"
30790"short16 __ovld __cnfn convert_short16_rte(short16);\n"
30791"short16 __ovld __cnfn convert_short16_sat_rte(short16);\n"
30792"short16 __ovld __cnfn convert_short16_rtz(short16);\n"
30793"short16 __ovld __cnfn convert_short16_sat_rtz(short16);\n"
30794"short16 __ovld __cnfn convert_short16_rtp(short16);\n"
30795"short16 __ovld __cnfn convert_short16_sat_rtp(short16);\n"
30796"short16 __ovld __cnfn convert_short16_rtn(short16);\n"
30797"short16 __ovld __cnfn convert_short16_sat_rtn(short16);\n"
30798"short16 __ovld __cnfn convert_short16(short16);\n"
30799"short16 __ovld __cnfn convert_short16_sat(short16);\n"
30800"short16 __ovld __cnfn convert_short16_rte(ushort16);\n"
30801"short16 __ovld __cnfn convert_short16_sat_rte(ushort16);\n"
30802"short16 __ovld __cnfn convert_short16_rtz(ushort16);\n"
30803"short16 __ovld __cnfn convert_short16_sat_rtz(ushort16);\n"
30804"short16 __ovld __cnfn convert_short16_rtp(ushort16);\n"
30805"short16 __ovld __cnfn convert_short16_sat_rtp(ushort16);\n"
30806"short16 __ovld __cnfn convert_short16_rtn(ushort16);\n"
30807"short16 __ovld __cnfn convert_short16_sat_rtn(ushort16);\n"
30808"short16 __ovld __cnfn convert_short16(ushort16);\n"
30809"short16 __ovld __cnfn convert_short16_sat(ushort16);\n"
30810"short16 __ovld __cnfn convert_short16_rte(int16);\n"
30811"short16 __ovld __cnfn convert_short16_sat_rte(int16);\n"
30812"short16 __ovld __cnfn convert_short16_rtz(int16);\n"
30813"short16 __ovld __cnfn convert_short16_sat_rtz(int16);\n"
30814"short16 __ovld __cnfn convert_short16_rtp(int16);\n"
30815"short16 __ovld __cnfn convert_short16_sat_rtp(int16);\n"
30816"short16 __ovld __cnfn convert_short16_rtn(int16);\n"
30817"short16 __ovld __cnfn convert_short16_sat_rtn(int16);\n"
30818"short16 __ovld __cnfn convert_short16(int16);\n"
30819"short16 __ovld __cnfn convert_short16_sat(int16);\n"
30820"short16 __ovld __cnfn convert_short16_rte(uint16);\n"
30821"short16 __ovld __cnfn convert_short16_sat_rte(uint16);\n"
30822"short16 __ovld __cnfn convert_short16_rtz(uint16);\n"
30823"short16 __ovld __cnfn convert_short16_sat_rtz(uint16);\n"
30824"short16 __ovld __cnfn convert_short16_rtp(uint16);\n"
30825"short16 __ovld __cnfn convert_short16_sat_rtp(uint16);\n"
30826"short16 __ovld __cnfn convert_short16_rtn(uint16);\n"
30827"short16 __ovld __cnfn convert_short16_sat_rtn(uint16);\n"
30828"short16 __ovld __cnfn convert_short16(uint16);\n"
30829"short16 __ovld __cnfn convert_short16_sat(uint16);\n"
30830"short16 __ovld __cnfn convert_short16_rte(long16);\n"
30831"short16 __ovld __cnfn convert_short16_sat_rte(long16);\n"
30832"short16 __ovld __cnfn convert_short16_rtz(long16);\n"
30833"short16 __ovld __cnfn convert_short16_sat_rtz(long16);\n"
30834"short16 __ovld __cnfn convert_short16_rtp(long16);\n"
30835"short16 __ovld __cnfn convert_short16_sat_rtp(long16);\n"
30836"short16 __ovld __cnfn convert_short16_rtn(long16);\n"
30837"short16 __ovld __cnfn convert_short16_sat_rtn(long16);\n"
30838"short16 __ovld __cnfn convert_short16(long16);\n"
30839"short16 __ovld __cnfn convert_short16_sat(long16);\n"
30840"short16 __ovld __cnfn convert_short16_rte(ulong16);\n"
30841"short16 __ovld __cnfn convert_short16_sat_rte(ulong16);\n"
30842"short16 __ovld __cnfn convert_short16_rtz(ulong16);\n"
30843"short16 __ovld __cnfn convert_short16_sat_rtz(ulong16);\n"
30844"short16 __ovld __cnfn convert_short16_rtp(ulong16);\n"
30845"short16 __ovld __cnfn convert_short16_sat_rtp(ulong16);\n"
30846"short16 __ovld __cnfn convert_short16_rtn(ulong16);\n"
30847"short16 __ovld __cnfn convert_short16_sat_rtn(ulong16);\n"
30848"short16 __ovld __cnfn convert_short16(ulong16);\n"
30849"short16 __ovld __cnfn convert_short16_sat(ulong16);\n"
30850"short16 __ovld __cnfn convert_short16_rte(float16);\n"
30851"short16 __ovld __cnfn convert_short16_sat_rte(float16);\n"
30852"short16 __ovld __cnfn convert_short16_rtz(float16);\n"
30853"short16 __ovld __cnfn convert_short16_sat_rtz(float16);\n"
30854"short16 __ovld __cnfn convert_short16_rtp(float16);\n"
30855"short16 __ovld __cnfn convert_short16_sat_rtp(float16);\n"
30856"short16 __ovld __cnfn convert_short16_rtn(float16);\n"
30857"short16 __ovld __cnfn convert_short16_sat_rtn(float16);\n"
30858"short16 __ovld __cnfn convert_short16(float16);\n"
30859"short16 __ovld __cnfn convert_short16_sat(float16);\n"
30860"ushort16 __ovld __cnfn convert_ushort16_rte(char16);\n"
30861"ushort16 __ovld __cnfn convert_ushort16_sat_rte(char16);\n"
30862"ushort16 __ovld __cnfn convert_ushort16_rtz(char16);\n"
30863"ushort16 __ovld __cnfn convert_ushort16_sat_rtz(char16);\n"
30864"ushort16 __ovld __cnfn convert_ushort16_rtp(char16);\n"
30865"ushort16 __ovld __cnfn convert_ushort16_sat_rtp(char16);\n"
30866"ushort16 __ovld __cnfn convert_ushort16_rtn(char16);\n"
30867"ushort16 __ovld __cnfn convert_ushort16_sat_rtn(char16);\n"
30868"ushort16 __ovld __cnfn convert_ushort16(char16);\n"
30869"ushort16 __ovld __cnfn convert_ushort16_sat(char16);\n"
30870"ushort16 __ovld __cnfn convert_ushort16_rte(uchar16);\n"
30871"ushort16 __ovld __cnfn convert_ushort16_sat_rte(uchar16);\n"
30872"ushort16 __ovld __cnfn convert_ushort16_rtz(uchar16);\n"
30873"ushort16 __ovld __cnfn convert_ushort16_sat_rtz(uchar16);\n"
30874"ushort16 __ovld __cnfn convert_ushort16_rtp(uchar16);\n"
30875"ushort16 __ovld __cnfn convert_ushort16_sat_rtp(uchar16);\n"
30876"ushort16 __ovld __cnfn convert_ushort16_rtn(uchar16);\n"
30877"ushort16 __ovld __cnfn convert_ushort16_sat_rtn(uchar16);\n"
30878"ushort16 __ovld __cnfn convert_ushort16(uchar16);\n"
30879"ushort16 __ovld __cnfn convert_ushort16_sat(uchar16);\n"
30880"ushort16 __ovld __cnfn convert_ushort16_rte(short16);\n"
30881"ushort16 __ovld __cnfn convert_ushort16_sat_rte(short16);\n"
30882"ushort16 __ovld __cnfn convert_ushort16_rtz(short16);\n"
30883"ushort16 __ovld __cnfn convert_ushort16_sat_rtz(short16);\n"
30884"ushort16 __ovld __cnfn convert_ushort16_rtp(short16);\n"
30885"ushort16 __ovld __cnfn convert_ushort16_sat_rtp(short16);\n"
30886"ushort16 __ovld __cnfn convert_ushort16_rtn(short16);\n"
30887"ushort16 __ovld __cnfn convert_ushort16_sat_rtn(short16);\n"
30888"ushort16 __ovld __cnfn convert_ushort16(short16);\n"
30889"ushort16 __ovld __cnfn convert_ushort16_sat(short16);\n"
30890"ushort16 __ovld __cnfn convert_ushort16_rte(ushort16);\n"
30891"ushort16 __ovld __cnfn convert_ushort16_sat_rte(ushort16);\n"
30892"ushort16 __ovld __cnfn convert_ushort16_rtz(ushort16);\n"
30893"ushort16 __ovld __cnfn convert_ushort16_sat_rtz(ushort16);\n"
30894"ushort16 __ovld __cnfn convert_ushort16_rtp(ushort16);\n"
30895"ushort16 __ovld __cnfn convert_ushort16_sat_rtp(ushort16);\n"
30896"ushort16 __ovld __cnfn convert_ushort16_rtn(ushort16);\n"
30897"ushort16 __ovld __cnfn convert_ushort16_sat_rtn(ushort16);\n"
30898"ushort16 __ovld __cnfn convert_ushort16(ushort16);\n"
30899"ushort16 __ovld __cnfn convert_ushort16_sat(ushort16);\n"
30900"ushort16 __ovld __cnfn convert_ushort16_rte(int16);\n"
30901"ushort16 __ovld __cnfn convert_ushort16_sat_rte(int16);\n"
30902"ushort16 __ovld __cnfn convert_ushort16_rtz(int16);\n"
30903"ushort16 __ovld __cnfn convert_ushort16_sat_rtz(int16);\n"
30904"ushort16 __ovld __cnfn convert_ushort16_rtp(int16);\n"
30905"ushort16 __ovld __cnfn convert_ushort16_sat_rtp(int16);\n"
30906"ushort16 __ovld __cnfn convert_ushort16_rtn(int16);\n"
30907"ushort16 __ovld __cnfn convert_ushort16_sat_rtn(int16);\n"
30908"ushort16 __ovld __cnfn convert_ushort16(int16);\n"
30909"ushort16 __ovld __cnfn convert_ushort16_sat(int16);\n"
30910"ushort16 __ovld __cnfn convert_ushort16_rte(uint16);\n"
30911"ushort16 __ovld __cnfn convert_ushort16_sat_rte(uint16);\n"
30912"ushort16 __ovld __cnfn convert_ushort16_rtz(uint16);\n"
30913"ushort16 __ovld __cnfn convert_ushort16_sat_rtz(uint16);\n"
30914"ushort16 __ovld __cnfn convert_ushort16_rtp(uint16);\n"
30915"ushort16 __ovld __cnfn convert_ushort16_sat_rtp(uint16);\n"
30916"ushort16 __ovld __cnfn convert_ushort16_rtn(uint16);\n"
30917"ushort16 __ovld __cnfn convert_ushort16_sat_rtn(uint16);\n"
30918"ushort16 __ovld __cnfn convert_ushort16(uint16);\n"
30919"ushort16 __ovld __cnfn convert_ushort16_sat(uint16);\n"
30920"ushort16 __ovld __cnfn convert_ushort16_rte(long16);\n"
30921"ushort16 __ovld __cnfn convert_ushort16_sat_rte(long16);\n"
30922"ushort16 __ovld __cnfn convert_ushort16_rtz(long16);\n"
30923"ushort16 __ovld __cnfn convert_ushort16_sat_rtz(long16);\n"
30924"ushort16 __ovld __cnfn convert_ushort16_rtp(long16);\n"
30925"ushort16 __ovld __cnfn convert_ushort16_sat_rtp(long16);\n"
30926"ushort16 __ovld __cnfn convert_ushort16_rtn(long16);\n"
30927"ushort16 __ovld __cnfn convert_ushort16_sat_rtn(long16);\n"
30928"ushort16 __ovld __cnfn convert_ushort16(long16);\n"
30929"ushort16 __ovld __cnfn convert_ushort16_sat(long16);\n"
30930"ushort16 __ovld __cnfn convert_ushort16_rte(ulong16);\n"
30931"ushort16 __ovld __cnfn convert_ushort16_sat_rte(ulong16);\n"
30932"ushort16 __ovld __cnfn convert_ushort16_rtz(ulong16);\n"
30933"ushort16 __ovld __cnfn convert_ushort16_sat_rtz(ulong16);\n"
30934"ushort16 __ovld __cnfn convert_ushort16_rtp(ulong16);\n"
30935"ushort16 __ovld __cnfn convert_ushort16_sat_rtp(ulong16);\n"
30936"ushort16 __ovld __cnfn convert_ushort16_rtn(ulong16);\n"
30937"ushort16 __ovld __cnfn convert_ushort16_sat_rtn(ulong16);\n"
30938"ushort16 __ovld __cnfn convert_ushort16(ulong16);\n"
30939"ushort16 __ovld __cnfn convert_ushort16_sat(ulong16);\n"
30940"ushort16 __ovld __cnfn convert_ushort16_rte(float16);\n"
30941"ushort16 __ovld __cnfn convert_ushort16_sat_rte(float16);\n"
30942"ushort16 __ovld __cnfn convert_ushort16_rtz(float16);\n"
30943"ushort16 __ovld __cnfn convert_ushort16_sat_rtz(float16);\n"
30944"ushort16 __ovld __cnfn convert_ushort16_rtp(float16);\n"
30945"ushort16 __ovld __cnfn convert_ushort16_sat_rtp(float16);\n"
30946"ushort16 __ovld __cnfn convert_ushort16_rtn(float16);\n"
30947"ushort16 __ovld __cnfn convert_ushort16_sat_rtn(float16);\n"
30948"ushort16 __ovld __cnfn convert_ushort16(float16);\n"
30949"ushort16 __ovld __cnfn convert_ushort16_sat(float16);\n"
30950"int16 __ovld __cnfn convert_int16_rte(char16);\n"
30951"int16 __ovld __cnfn convert_int16_sat_rte(char16);\n"
30952"int16 __ovld __cnfn convert_int16_rtz(char16);\n"
30953"int16 __ovld __cnfn convert_int16_sat_rtz(char16);\n"
30954"int16 __ovld __cnfn convert_int16_rtp(char16);\n"
30955"int16 __ovld __cnfn convert_int16_sat_rtp(char16);\n"
30956"int16 __ovld __cnfn convert_int16_rtn(char16);\n"
30957"int16 __ovld __cnfn convert_int16_sat_rtn(char16);\n"
30958"int16 __ovld __cnfn convert_int16(char16);\n"
30959"int16 __ovld __cnfn convert_int16_sat(char16);\n"
30960"int16 __ovld __cnfn convert_int16_rte(uchar16);\n"
30961"int16 __ovld __cnfn convert_int16_sat_rte(uchar16);\n"
30962"int16 __ovld __cnfn convert_int16_rtz(uchar16);\n"
30963"int16 __ovld __cnfn convert_int16_sat_rtz(uchar16);\n"
30964"int16 __ovld __cnfn convert_int16_rtp(uchar16);\n"
30965"int16 __ovld __cnfn convert_int16_sat_rtp(uchar16);\n"
30966"int16 __ovld __cnfn convert_int16_rtn(uchar16);\n"
30967"int16 __ovld __cnfn convert_int16_sat_rtn(uchar16);\n"
30968"int16 __ovld __cnfn convert_int16(uchar16);\n"
30969"int16 __ovld __cnfn convert_int16_sat(uchar16);\n"
30970"int16 __ovld __cnfn convert_int16_rte(short16);\n"
30971"int16 __ovld __cnfn convert_int16_sat_rte(short16);\n"
30972"int16 __ovld __cnfn convert_int16_rtz(short16);\n"
30973"int16 __ovld __cnfn convert_int16_sat_rtz(short16);\n"
30974"int16 __ovld __cnfn convert_int16_rtp(short16);\n"
30975"int16 __ovld __cnfn convert_int16_sat_rtp(short16);\n"
30976"int16 __ovld __cnfn convert_int16_rtn(short16);\n"
30977"int16 __ovld __cnfn convert_int16_sat_rtn(short16);\n"
30978"int16 __ovld __cnfn convert_int16(short16);\n"
30979"int16 __ovld __cnfn convert_int16_sat(short16);\n"
30980"int16 __ovld __cnfn convert_int16_rte(ushort16);\n"
30981"int16 __ovld __cnfn convert_int16_sat_rte(ushort16);\n"
30982"int16 __ovld __cnfn convert_int16_rtz(ushort16);\n"
30983"int16 __ovld __cnfn convert_int16_sat_rtz(ushort16);\n"
30984"int16 __ovld __cnfn convert_int16_rtp(ushort16);\n"
30985"int16 __ovld __cnfn convert_int16_sat_rtp(ushort16);\n"
30986"int16 __ovld __cnfn convert_int16_rtn(ushort16);\n"
30987"int16 __ovld __cnfn convert_int16_sat_rtn(ushort16);\n"
30988"int16 __ovld __cnfn convert_int16(ushort16);\n"
30989"int16 __ovld __cnfn convert_int16_sat(ushort16);\n"
30990"int16 __ovld __cnfn convert_int16_rte(int16);\n"
30991"int16 __ovld __cnfn convert_int16_sat_rte(int16);\n"
30992"int16 __ovld __cnfn convert_int16_rtz(int16);\n"
30993"int16 __ovld __cnfn convert_int16_sat_rtz(int16);\n"
30994"int16 __ovld __cnfn convert_int16_rtp(int16);\n"
30995"int16 __ovld __cnfn convert_int16_sat_rtp(int16);\n"
30996"int16 __ovld __cnfn convert_int16_rtn(int16);\n"
30997"int16 __ovld __cnfn convert_int16_sat_rtn(int16);\n"
30998"int16 __ovld __cnfn convert_int16(int16);\n"
30999"int16 __ovld __cnfn convert_int16_sat(int16);\n"
31000"int16 __ovld __cnfn convert_int16_rte(uint16);\n"
31001"int16 __ovld __cnfn convert_int16_sat_rte(uint16);\n"
31002"int16 __ovld __cnfn convert_int16_rtz(uint16);\n"
31003"int16 __ovld __cnfn convert_int16_sat_rtz(uint16);\n"
31004"int16 __ovld __cnfn convert_int16_rtp(uint16);\n"
31005"int16 __ovld __cnfn convert_int16_sat_rtp(uint16);\n"
31006"int16 __ovld __cnfn convert_int16_rtn(uint16);\n"
31007"int16 __ovld __cnfn convert_int16_sat_rtn(uint16);\n"
31008"int16 __ovld __cnfn convert_int16(uint16);\n"
31009"int16 __ovld __cnfn convert_int16_sat(uint16);\n"
31010"int16 __ovld __cnfn convert_int16_rte(long16);\n"
31011"int16 __ovld __cnfn convert_int16_sat_rte(long16);\n"
31012"int16 __ovld __cnfn convert_int16_rtz(long16);\n"
31013"int16 __ovld __cnfn convert_int16_sat_rtz(long16);\n"
31014"int16 __ovld __cnfn convert_int16_rtp(long16);\n"
31015"int16 __ovld __cnfn convert_int16_sat_rtp(long16);\n"
31016"int16 __ovld __cnfn convert_int16_rtn(long16);\n"
31017"int16 __ovld __cnfn convert_int16_sat_rtn(long16);\n"
31018"int16 __ovld __cnfn convert_int16(long16);\n"
31019"int16 __ovld __cnfn convert_int16_sat(long16);\n"
31020"int16 __ovld __cnfn convert_int16_rte(ulong16);\n"
31021"int16 __ovld __cnfn convert_int16_sat_rte(ulong16);\n"
31022"int16 __ovld __cnfn convert_int16_rtz(ulong16);\n"
31023"int16 __ovld __cnfn convert_int16_sat_rtz(ulong16);\n"
31024"int16 __ovld __cnfn convert_int16_rtp(ulong16);\n"
31025"int16 __ovld __cnfn convert_int16_sat_rtp(ulong16);\n"
31026"int16 __ovld __cnfn convert_int16_rtn(ulong16);\n"
31027"int16 __ovld __cnfn convert_int16_sat_rtn(ulong16);\n"
31028"int16 __ovld __cnfn convert_int16(ulong16);\n"
31029"int16 __ovld __cnfn convert_int16_sat(ulong16);\n"
31030"int16 __ovld __cnfn convert_int16_rte(float16);\n"
31031"int16 __ovld __cnfn convert_int16_sat_rte(float16);\n"
31032"int16 __ovld __cnfn convert_int16_rtz(float16);\n"
31033"int16 __ovld __cnfn convert_int16_sat_rtz(float16);\n"
31034"int16 __ovld __cnfn convert_int16_rtp(float16);\n"
31035"int16 __ovld __cnfn convert_int16_sat_rtp(float16);\n"
31036"int16 __ovld __cnfn convert_int16_rtn(float16);\n"
31037"int16 __ovld __cnfn convert_int16_sat_rtn(float16);\n"
31038"int16 __ovld __cnfn convert_int16(float16);\n"
31039"int16 __ovld __cnfn convert_int16_sat(float16);\n"
31040"uint16 __ovld __cnfn convert_uint16_rte(char16);\n"
31041"uint16 __ovld __cnfn convert_uint16_sat_rte(char16);\n"
31042"uint16 __ovld __cnfn convert_uint16_rtz(char16);\n"
31043"uint16 __ovld __cnfn convert_uint16_sat_rtz(char16);\n"
31044"uint16 __ovld __cnfn convert_uint16_rtp(char16);\n"
31045"uint16 __ovld __cnfn convert_uint16_sat_rtp(char16);\n"
31046"uint16 __ovld __cnfn convert_uint16_rtn(char16);\n"
31047"uint16 __ovld __cnfn convert_uint16_sat_rtn(char16);\n"
31048"uint16 __ovld __cnfn convert_uint16(char16);\n"
31049"uint16 __ovld __cnfn convert_uint16_sat(char16);\n"
31050"uint16 __ovld __cnfn convert_uint16_rte(uchar16);\n"
31051"uint16 __ovld __cnfn convert_uint16_sat_rte(uchar16);\n"
31052"uint16 __ovld __cnfn convert_uint16_rtz(uchar16);\n"
31053"uint16 __ovld __cnfn convert_uint16_sat_rtz(uchar16);\n"
31054"uint16 __ovld __cnfn convert_uint16_rtp(uchar16);\n"
31055"uint16 __ovld __cnfn convert_uint16_sat_rtp(uchar16);\n"
31056"uint16 __ovld __cnfn convert_uint16_rtn(uchar16);\n"
31057"uint16 __ovld __cnfn convert_uint16_sat_rtn(uchar16);\n"
31058"uint16 __ovld __cnfn convert_uint16(uchar16);\n"
31059"uint16 __ovld __cnfn convert_uint16_sat(uchar16);\n"
31060"uint16 __ovld __cnfn convert_uint16_rte(short16);\n"
31061"uint16 __ovld __cnfn convert_uint16_sat_rte(short16);\n"
31062"uint16 __ovld __cnfn convert_uint16_rtz(short16);\n"
31063"uint16 __ovld __cnfn convert_uint16_sat_rtz(short16);\n"
31064"uint16 __ovld __cnfn convert_uint16_rtp(short16);\n"
31065"uint16 __ovld __cnfn convert_uint16_sat_rtp(short16);\n"
31066"uint16 __ovld __cnfn convert_uint16_rtn(short16);\n"
31067"uint16 __ovld __cnfn convert_uint16_sat_rtn(short16);\n"
31068"uint16 __ovld __cnfn convert_uint16(short16);\n"
31069"uint16 __ovld __cnfn convert_uint16_sat(short16);\n"
31070"uint16 __ovld __cnfn convert_uint16_rte(ushort16);\n"
31071"uint16 __ovld __cnfn convert_uint16_sat_rte(ushort16);\n"
31072"uint16 __ovld __cnfn convert_uint16_rtz(ushort16);\n"
31073"uint16 __ovld __cnfn convert_uint16_sat_rtz(ushort16);\n"
31074"uint16 __ovld __cnfn convert_uint16_rtp(ushort16);\n"
31075"uint16 __ovld __cnfn convert_uint16_sat_rtp(ushort16);\n"
31076"uint16 __ovld __cnfn convert_uint16_rtn(ushort16);\n"
31077"uint16 __ovld __cnfn convert_uint16_sat_rtn(ushort16);\n"
31078"uint16 __ovld __cnfn convert_uint16(ushort16);\n"
31079"uint16 __ovld __cnfn convert_uint16_sat(ushort16);\n"
31080"uint16 __ovld __cnfn convert_uint16_rte(int16);\n"
31081"uint16 __ovld __cnfn convert_uint16_sat_rte(int16);\n"
31082"uint16 __ovld __cnfn convert_uint16_rtz(int16);\n"
31083"uint16 __ovld __cnfn convert_uint16_sat_rtz(int16);\n"
31084"uint16 __ovld __cnfn convert_uint16_rtp(int16);\n"
31085"uint16 __ovld __cnfn convert_uint16_sat_rtp(int16);\n"
31086"uint16 __ovld __cnfn convert_uint16_rtn(int16);\n"
31087"uint16 __ovld __cnfn convert_uint16_sat_rtn(int16);\n"
31088"uint16 __ovld __cnfn convert_uint16(int16);\n"
31089"uint16 __ovld __cnfn convert_uint16_sat(int16);\n"
31090"uint16 __ovld __cnfn convert_uint16_rte(uint16);\n"
31091"uint16 __ovld __cnfn convert_uint16_sat_rte(uint16);\n"
31092"uint16 __ovld __cnfn convert_uint16_rtz(uint16);\n"
31093"uint16 __ovld __cnfn convert_uint16_sat_rtz(uint16);\n"
31094"uint16 __ovld __cnfn convert_uint16_rtp(uint16);\n"
31095"uint16 __ovld __cnfn convert_uint16_sat_rtp(uint16);\n"
31096"uint16 __ovld __cnfn convert_uint16_rtn(uint16);\n"
31097"uint16 __ovld __cnfn convert_uint16_sat_rtn(uint16);\n"
31098"uint16 __ovld __cnfn convert_uint16(uint16);\n"
31099"uint16 __ovld __cnfn convert_uint16_sat(uint16);\n"
31100"uint16 __ovld __cnfn convert_uint16_rte(long16);\n"
31101"uint16 __ovld __cnfn convert_uint16_sat_rte(long16);\n"
31102"uint16 __ovld __cnfn convert_uint16_rtz(long16);\n"
31103"uint16 __ovld __cnfn convert_uint16_sat_rtz(long16);\n"
31104"uint16 __ovld __cnfn convert_uint16_rtp(long16);\n"
31105"uint16 __ovld __cnfn convert_uint16_sat_rtp(long16);\n"
31106"uint16 __ovld __cnfn convert_uint16_rtn(long16);\n"
31107"uint16 __ovld __cnfn convert_uint16_sat_rtn(long16);\n"
31108"uint16 __ovld __cnfn convert_uint16(long16);\n"
31109"uint16 __ovld __cnfn convert_uint16_sat(long16);\n"
31110"uint16 __ovld __cnfn convert_uint16_rte(ulong16);\n"
31111"uint16 __ovld __cnfn convert_uint16_sat_rte(ulong16);\n"
31112"uint16 __ovld __cnfn convert_uint16_rtz(ulong16);\n"
31113"uint16 __ovld __cnfn convert_uint16_sat_rtz(ulong16);\n"
31114"uint16 __ovld __cnfn convert_uint16_rtp(ulong16);\n"
31115"uint16 __ovld __cnfn convert_uint16_sat_rtp(ulong16);\n"
31116"uint16 __ovld __cnfn convert_uint16_rtn(ulong16);\n"
31117"uint16 __ovld __cnfn convert_uint16_sat_rtn(ulong16);\n"
31118"uint16 __ovld __cnfn convert_uint16(ulong16);\n"
31119"uint16 __ovld __cnfn convert_uint16_sat(ulong16);\n"
31120"uint16 __ovld __cnfn convert_uint16_rte(float16);\n"
31121"uint16 __ovld __cnfn convert_uint16_sat_rte(float16);\n"
31122"uint16 __ovld __cnfn convert_uint16_rtz(float16);\n"
31123"uint16 __ovld __cnfn convert_uint16_sat_rtz(float16);\n"
31124"uint16 __ovld __cnfn convert_uint16_rtp(float16);\n"
31125"uint16 __ovld __cnfn convert_uint16_sat_rtp(float16);\n"
31126"uint16 __ovld __cnfn convert_uint16_rtn(float16);\n"
31127"uint16 __ovld __cnfn convert_uint16_sat_rtn(float16);\n"
31128"uint16 __ovld __cnfn convert_uint16(float16);\n"
31129"uint16 __ovld __cnfn convert_uint16_sat(float16);\n"
31130"long16 __ovld __cnfn convert_long16_rte(char16);\n"
31131"long16 __ovld __cnfn convert_long16_sat_rte(char16);\n"
31132"long16 __ovld __cnfn convert_long16_rtz(char16);\n"
31133"long16 __ovld __cnfn convert_long16_sat_rtz(char16);\n"
31134"long16 __ovld __cnfn convert_long16_rtp(char16);\n"
31135"long16 __ovld __cnfn convert_long16_sat_rtp(char16);\n"
31136"long16 __ovld __cnfn convert_long16_rtn(char16);\n"
31137"long16 __ovld __cnfn convert_long16_sat_rtn(char16);\n"
31138"long16 __ovld __cnfn convert_long16(char16);\n"
31139"long16 __ovld __cnfn convert_long16_sat(char16);\n"
31140"long16 __ovld __cnfn convert_long16_rte(uchar16);\n"
31141"long16 __ovld __cnfn convert_long16_sat_rte(uchar16);\n"
31142"long16 __ovld __cnfn convert_long16_rtz(uchar16);\n"
31143"long16 __ovld __cnfn convert_long16_sat_rtz(uchar16);\n"
31144"long16 __ovld __cnfn convert_long16_rtp(uchar16);\n"
31145"long16 __ovld __cnfn convert_long16_sat_rtp(uchar16);\n"
31146"long16 __ovld __cnfn convert_long16_rtn(uchar16);\n"
31147"long16 __ovld __cnfn convert_long16_sat_rtn(uchar16);\n"
31148"long16 __ovld __cnfn convert_long16(uchar16);\n"
31149"long16 __ovld __cnfn convert_long16_sat(uchar16);\n"
31150"long16 __ovld __cnfn convert_long16_rte(short16);\n"
31151"long16 __ovld __cnfn convert_long16_sat_rte(short16);\n"
31152"long16 __ovld __cnfn convert_long16_rtz(short16);\n"
31153"long16 __ovld __cnfn convert_long16_sat_rtz(short16);\n"
31154"long16 __ovld __cnfn convert_long16_rtp(short16);\n"
31155"long16 __ovld __cnfn convert_long16_sat_rtp(short16);\n"
31156"long16 __ovld __cnfn convert_long16_rtn(short16);\n"
31157"long16 __ovld __cnfn convert_long16_sat_rtn(short16);\n"
31158"long16 __ovld __cnfn convert_long16(short16);\n"
31159"long16 __ovld __cnfn convert_long16_sat(short16);\n"
31160"long16 __ovld __cnfn convert_long16_rte(ushort16);\n"
31161"long16 __ovld __cnfn convert_long16_sat_rte(ushort16);\n"
31162"long16 __ovld __cnfn convert_long16_rtz(ushort16);\n"
31163"long16 __ovld __cnfn convert_long16_sat_rtz(ushort16);\n"
31164"long16 __ovld __cnfn convert_long16_rtp(ushort16);\n"
31165"long16 __ovld __cnfn convert_long16_sat_rtp(ushort16);\n"
31166"long16 __ovld __cnfn convert_long16_rtn(ushort16);\n"
31167"long16 __ovld __cnfn convert_long16_sat_rtn(ushort16);\n"
31168"long16 __ovld __cnfn convert_long16(ushort16);\n"
31169"long16 __ovld __cnfn convert_long16_sat(ushort16);\n"
31170"long16 __ovld __cnfn convert_long16_rte(int16);\n"
31171"long16 __ovld __cnfn convert_long16_sat_rte(int16);\n"
31172"long16 __ovld __cnfn convert_long16_rtz(int16);\n"
31173"long16 __ovld __cnfn convert_long16_sat_rtz(int16);\n"
31174"long16 __ovld __cnfn convert_long16_rtp(int16);\n"
31175"long16 __ovld __cnfn convert_long16_sat_rtp(int16);\n"
31176"long16 __ovld __cnfn convert_long16_rtn(int16);\n"
31177"long16 __ovld __cnfn convert_long16_sat_rtn(int16);\n"
31178"long16 __ovld __cnfn convert_long16(int16);\n"
31179"long16 __ovld __cnfn convert_long16_sat(int16);\n"
31180"long16 __ovld __cnfn convert_long16_rte(uint16);\n"
31181"long16 __ovld __cnfn convert_long16_sat_rte(uint16);\n"
31182"long16 __ovld __cnfn convert_long16_rtz(uint16);\n"
31183"long16 __ovld __cnfn convert_long16_sat_rtz(uint16);\n"
31184"long16 __ovld __cnfn convert_long16_rtp(uint16);\n"
31185"long16 __ovld __cnfn convert_long16_sat_rtp(uint16);\n"
31186"long16 __ovld __cnfn convert_long16_rtn(uint16);\n"
31187"long16 __ovld __cnfn convert_long16_sat_rtn(uint16);\n"
31188"long16 __ovld __cnfn convert_long16(uint16);\n"
31189"long16 __ovld __cnfn convert_long16_sat(uint16);\n"
31190"long16 __ovld __cnfn convert_long16_rte(long16);\n"
31191"long16 __ovld __cnfn convert_long16_sat_rte(long16);\n"
31192"long16 __ovld __cnfn convert_long16_rtz(long16);\n"
31193"long16 __ovld __cnfn convert_long16_sat_rtz(long16);\n"
31194"long16 __ovld __cnfn convert_long16_rtp(long16);\n"
31195"long16 __ovld __cnfn convert_long16_sat_rtp(long16);\n"
31196"long16 __ovld __cnfn convert_long16_rtn(long16);\n"
31197"long16 __ovld __cnfn convert_long16_sat_rtn(long16);\n"
31198"long16 __ovld __cnfn convert_long16(long16);\n"
31199"long16 __ovld __cnfn convert_long16_sat(long16);\n"
31200"long16 __ovld __cnfn convert_long16_rte(ulong16);\n"
31201"long16 __ovld __cnfn convert_long16_sat_rte(ulong16);\n"
31202"long16 __ovld __cnfn convert_long16_rtz(ulong16);\n"
31203"long16 __ovld __cnfn convert_long16_sat_rtz(ulong16);\n"
31204"long16 __ovld __cnfn convert_long16_rtp(ulong16);\n"
31205"long16 __ovld __cnfn convert_long16_sat_rtp(ulong16);\n"
31206"long16 __ovld __cnfn convert_long16_rtn(ulong16);\n"
31207"long16 __ovld __cnfn convert_long16_sat_rtn(ulong16);\n"
31208"long16 __ovld __cnfn convert_long16(ulong16);\n"
31209"long16 __ovld __cnfn convert_long16_sat(ulong16);\n"
31210"long16 __ovld __cnfn convert_long16_rte(float16);\n"
31211"long16 __ovld __cnfn convert_long16_sat_rte(float16);\n"
31212"long16 __ovld __cnfn convert_long16_rtz(float16);\n"
31213"long16 __ovld __cnfn convert_long16_sat_rtz(float16);\n"
31214"long16 __ovld __cnfn convert_long16_rtp(float16);\n"
31215"long16 __ovld __cnfn convert_long16_sat_rtp(float16);\n"
31216"long16 __ovld __cnfn convert_long16_rtn(float16);\n"
31217"long16 __ovld __cnfn convert_long16_sat_rtn(float16);\n"
31218"long16 __ovld __cnfn convert_long16(float16);\n"
31219"long16 __ovld __cnfn convert_long16_sat(float16);\n"
31220"ulong16 __ovld __cnfn convert_ulong16_rte(char16);\n"
31221"ulong16 __ovld __cnfn convert_ulong16_sat_rte(char16);\n"
31222"ulong16 __ovld __cnfn convert_ulong16_rtz(char16);\n"
31223"ulong16 __ovld __cnfn convert_ulong16_sat_rtz(char16);\n"
31224"ulong16 __ovld __cnfn convert_ulong16_rtp(char16);\n"
31225"ulong16 __ovld __cnfn convert_ulong16_sat_rtp(char16);\n"
31226"ulong16 __ovld __cnfn convert_ulong16_rtn(char16);\n"
31227"ulong16 __ovld __cnfn convert_ulong16_sat_rtn(char16);\n"
31228"ulong16 __ovld __cnfn convert_ulong16(char16);\n"
31229"ulong16 __ovld __cnfn convert_ulong16_sat(char16);\n"
31230"ulong16 __ovld __cnfn convert_ulong16_rte(uchar16);\n"
31231"ulong16 __ovld __cnfn convert_ulong16_sat_rte(uchar16);\n"
31232"ulong16 __ovld __cnfn convert_ulong16_rtz(uchar16);\n"
31233"ulong16 __ovld __cnfn convert_ulong16_sat_rtz(uchar16);\n"
31234"ulong16 __ovld __cnfn convert_ulong16_rtp(uchar16);\n"
31235"ulong16 __ovld __cnfn convert_ulong16_sat_rtp(uchar16);\n"
31236"ulong16 __ovld __cnfn convert_ulong16_rtn(uchar16);\n"
31237"ulong16 __ovld __cnfn convert_ulong16_sat_rtn(uchar16);\n"
31238"ulong16 __ovld __cnfn convert_ulong16(uchar16);\n"
31239"ulong16 __ovld __cnfn convert_ulong16_sat(uchar16);\n"
31240"ulong16 __ovld __cnfn convert_ulong16_rte(short16);\n"
31241"ulong16 __ovld __cnfn convert_ulong16_sat_rte(short16);\n"
31242"ulong16 __ovld __cnfn convert_ulong16_rtz(short16);\n"
31243"ulong16 __ovld __cnfn convert_ulong16_sat_rtz(short16);\n"
31244"ulong16 __ovld __cnfn convert_ulong16_rtp(short16);\n"
31245"ulong16 __ovld __cnfn convert_ulong16_sat_rtp(short16);\n"
31246"ulong16 __ovld __cnfn convert_ulong16_rtn(short16);\n"
31247"ulong16 __ovld __cnfn convert_ulong16_sat_rtn(short16);\n"
31248"ulong16 __ovld __cnfn convert_ulong16(short16);\n"
31249"ulong16 __ovld __cnfn convert_ulong16_sat(short16);\n"
31250"ulong16 __ovld __cnfn convert_ulong16_rte(ushort16);\n"
31251"ulong16 __ovld __cnfn convert_ulong16_sat_rte(ushort16);\n"
31252"ulong16 __ovld __cnfn convert_ulong16_rtz(ushort16);\n"
31253"ulong16 __ovld __cnfn convert_ulong16_sat_rtz(ushort16);\n"
31254"ulong16 __ovld __cnfn convert_ulong16_rtp(ushort16);\n"
31255"ulong16 __ovld __cnfn convert_ulong16_sat_rtp(ushort16);\n"
31256"ulong16 __ovld __cnfn convert_ulong16_rtn(ushort16);\n"
31257"ulong16 __ovld __cnfn convert_ulong16_sat_rtn(ushort16);\n"
31258"ulong16 __ovld __cnfn convert_ulong16(ushort16);\n"
31259"ulong16 __ovld __cnfn convert_ulong16_sat(ushort16);\n"
31260"ulong16 __ovld __cnfn convert_ulong16_rte(int16);\n"
31261"ulong16 __ovld __cnfn convert_ulong16_sat_rte(int16);\n"
31262"ulong16 __ovld __cnfn convert_ulong16_rtz(int16);\n"
31263"ulong16 __ovld __cnfn convert_ulong16_sat_rtz(int16);\n"
31264"ulong16 __ovld __cnfn convert_ulong16_rtp(int16);\n"
31265"ulong16 __ovld __cnfn convert_ulong16_sat_rtp(int16);\n"
31266"ulong16 __ovld __cnfn convert_ulong16_rtn(int16);\n"
31267"ulong16 __ovld __cnfn convert_ulong16_sat_rtn(int16);\n"
31268"ulong16 __ovld __cnfn convert_ulong16(int16);\n"
31269"ulong16 __ovld __cnfn convert_ulong16_sat(int16);\n"
31270"ulong16 __ovld __cnfn convert_ulong16_rte(uint16);\n"
31271"ulong16 __ovld __cnfn convert_ulong16_sat_rte(uint16);\n"
31272"ulong16 __ovld __cnfn convert_ulong16_rtz(uint16);\n"
31273"ulong16 __ovld __cnfn convert_ulong16_sat_rtz(uint16);\n"
31274"ulong16 __ovld __cnfn convert_ulong16_rtp(uint16);\n"
31275"ulong16 __ovld __cnfn convert_ulong16_sat_rtp(uint16);\n"
31276"ulong16 __ovld __cnfn convert_ulong16_rtn(uint16);\n"
31277"ulong16 __ovld __cnfn convert_ulong16_sat_rtn(uint16);\n"
31278"ulong16 __ovld __cnfn convert_ulong16(uint16);\n"
31279"ulong16 __ovld __cnfn convert_ulong16_sat(uint16);\n"
31280"ulong16 __ovld __cnfn convert_ulong16_rte(long16);\n"
31281"ulong16 __ovld __cnfn convert_ulong16_sat_rte(long16);\n"
31282"ulong16 __ovld __cnfn convert_ulong16_rtz(long16);\n"
31283"ulong16 __ovld __cnfn convert_ulong16_sat_rtz(long16);\n"
31284"ulong16 __ovld __cnfn convert_ulong16_rtp(long16);\n"
31285"ulong16 __ovld __cnfn convert_ulong16_sat_rtp(long16);\n"
31286"ulong16 __ovld __cnfn convert_ulong16_rtn(long16);\n"
31287"ulong16 __ovld __cnfn convert_ulong16_sat_rtn(long16);\n"
31288"ulong16 __ovld __cnfn convert_ulong16(long16);\n"
31289"ulong16 __ovld __cnfn convert_ulong16_sat(long16);\n"
31290"ulong16 __ovld __cnfn convert_ulong16_rte(ulong16);\n"
31291"ulong16 __ovld __cnfn convert_ulong16_sat_rte(ulong16);\n"
31292"ulong16 __ovld __cnfn convert_ulong16_rtz(ulong16);\n"
31293"ulong16 __ovld __cnfn convert_ulong16_sat_rtz(ulong16);\n"
31294"ulong16 __ovld __cnfn convert_ulong16_rtp(ulong16);\n"
31295"ulong16 __ovld __cnfn convert_ulong16_sat_rtp(ulong16);\n"
31296"ulong16 __ovld __cnfn convert_ulong16_rtn(ulong16);\n"
31297"ulong16 __ovld __cnfn convert_ulong16_sat_rtn(ulong16);\n"
31298"ulong16 __ovld __cnfn convert_ulong16(ulong16);\n"
31299"ulong16 __ovld __cnfn convert_ulong16_sat(ulong16);\n"
31300"ulong16 __ovld __cnfn convert_ulong16_rte(float16);\n"
31301"ulong16 __ovld __cnfn convert_ulong16_sat_rte(float16);\n"
31302"ulong16 __ovld __cnfn convert_ulong16_rtz(float16);\n"
31303"ulong16 __ovld __cnfn convert_ulong16_sat_rtz(float16);\n"
31304"ulong16 __ovld __cnfn convert_ulong16_rtp(float16);\n"
31305"ulong16 __ovld __cnfn convert_ulong16_sat_rtp(float16);\n"
31306"ulong16 __ovld __cnfn convert_ulong16_rtn(float16);\n"
31307"ulong16 __ovld __cnfn convert_ulong16_sat_rtn(float16);\n"
31308"ulong16 __ovld __cnfn convert_ulong16(float16);\n"
31309"ulong16 __ovld __cnfn convert_ulong16_sat(float16);\n"
31310"float16 __ovld __cnfn convert_float16_rte(char16);\n"
31311"float16 __ovld __cnfn convert_float16_rtz(char16);\n"
31312"float16 __ovld __cnfn convert_float16_rtp(char16);\n"
31313"float16 __ovld __cnfn convert_float16_rtn(char16);\n"
31314"float16 __ovld __cnfn convert_float16(char16);\n"
31315"float16 __ovld __cnfn convert_float16_rte(uchar16);\n"
31316"float16 __ovld __cnfn convert_float16_rtz(uchar16);\n"
31317"float16 __ovld __cnfn convert_float16_rtp(uchar16);\n"
31318"float16 __ovld __cnfn convert_float16_rtn(uchar16);\n"
31319"float16 __ovld __cnfn convert_float16(uchar16);\n"
31320"float16 __ovld __cnfn convert_float16_rte(short16);\n"
31321"float16 __ovld __cnfn convert_float16_rtz(short16);\n"
31322"float16 __ovld __cnfn convert_float16_rtp(short16);\n"
31323"float16 __ovld __cnfn convert_float16_rtn(short16);\n"
31324"float16 __ovld __cnfn convert_float16(short16);\n"
31325"float16 __ovld __cnfn convert_float16_rte(ushort16);\n"
31326"float16 __ovld __cnfn convert_float16_rtz(ushort16);\n"
31327"float16 __ovld __cnfn convert_float16_rtp(ushort16);\n"
31328"float16 __ovld __cnfn convert_float16_rtn(ushort16);\n"
31329"float16 __ovld __cnfn convert_float16(ushort16);\n"
31330"float16 __ovld __cnfn convert_float16_rte(int16);\n"
31331"float16 __ovld __cnfn convert_float16_rtz(int16);\n"
31332"float16 __ovld __cnfn convert_float16_rtp(int16);\n"
31333"float16 __ovld __cnfn convert_float16_rtn(int16);\n"
31334"float16 __ovld __cnfn convert_float16(int16);\n"
31335"float16 __ovld __cnfn convert_float16_rte(uint16);\n"
31336"float16 __ovld __cnfn convert_float16_rtz(uint16);\n"
31337"float16 __ovld __cnfn convert_float16_rtp(uint16);\n"
31338"float16 __ovld __cnfn convert_float16_rtn(uint16);\n"
31339"float16 __ovld __cnfn convert_float16(uint16);\n"
31340"float16 __ovld __cnfn convert_float16_rte(long16);\n"
31341"float16 __ovld __cnfn convert_float16_rtz(long16);\n"
31342"float16 __ovld __cnfn convert_float16_rtp(long16);\n"
31343"float16 __ovld __cnfn convert_float16_rtn(long16);\n"
31344"float16 __ovld __cnfn convert_float16(long16);\n"
31345"float16 __ovld __cnfn convert_float16_rte(ulong16);\n"
31346"float16 __ovld __cnfn convert_float16_rtz(ulong16);\n"
31347"float16 __ovld __cnfn convert_float16_rtp(ulong16);\n"
31348"float16 __ovld __cnfn convert_float16_rtn(ulong16);\n"
31349"float16 __ovld __cnfn convert_float16(ulong16);\n"
31350"float16 __ovld __cnfn convert_float16_rte(float16);\n"
31351"float16 __ovld __cnfn convert_float16_rtz(float16);\n"
31352"float16 __ovld __cnfn convert_float16_rtp(float16);\n"
31353"float16 __ovld __cnfn convert_float16_rtn(float16);\n"
31354"float16 __ovld __cnfn convert_float16(float16);\n"
31355"\n"
31356"// Conversions with double data type parameters or return value.\n"
31357"\n"
31358"#ifdef cl_khr_fp64\n"
31359"char __ovld __cnfn convert_char(double);\n"
31360"char __ovld __cnfn convert_char_rte(double);\n"
31361"char __ovld __cnfn convert_char_rtn(double);\n"
31362"char __ovld __cnfn convert_char_rtp(double);\n"
31363"char __ovld __cnfn convert_char_rtz(double);\n"
31364"char __ovld __cnfn convert_char_sat(double);\n"
31365"char __ovld __cnfn convert_char_sat_rte(double);\n"
31366"char __ovld __cnfn convert_char_sat_rtn(double);\n"
31367"char __ovld __cnfn convert_char_sat_rtp(double);\n"
31368"char __ovld __cnfn convert_char_sat_rtz(double);\n"
31369"char2 __ovld __cnfn convert_char2(double2);\n"
31370"char2 __ovld __cnfn convert_char2_rte(double2);\n"
31371"char2 __ovld __cnfn convert_char2_rtn(double2);\n"
31372"char2 __ovld __cnfn convert_char2_rtp(double2);\n"
31373"char2 __ovld __cnfn convert_char2_rtz(double2);\n"
31374"char2 __ovld __cnfn convert_char2_sat(double2);\n"
31375"char2 __ovld __cnfn convert_char2_sat_rte(double2);\n"
31376"char2 __ovld __cnfn convert_char2_sat_rtn(double2);\n"
31377"char2 __ovld __cnfn convert_char2_sat_rtp(double2);\n"
31378"char2 __ovld __cnfn convert_char2_sat_rtz(double2);\n"
31379"char3 __ovld __cnfn convert_char3(double3);\n"
31380"char3 __ovld __cnfn convert_char3_rte(double3);\n"
31381"char3 __ovld __cnfn convert_char3_rtn(double3);\n"
31382"char3 __ovld __cnfn convert_char3_rtp(double3);\n"
31383"char3 __ovld __cnfn convert_char3_rtz(double3);\n"
31384"char3 __ovld __cnfn convert_char3_sat(double3);\n"
31385"char3 __ovld __cnfn convert_char3_sat_rte(double3);\n"
31386"char3 __ovld __cnfn convert_char3_sat_rtn(double3);\n"
31387"char3 __ovld __cnfn convert_char3_sat_rtp(double3);\n"
31388"char3 __ovld __cnfn convert_char3_sat_rtz(double3);\n"
31389"char4 __ovld __cnfn convert_char4(double4);\n"
31390"char4 __ovld __cnfn convert_char4_rte(double4);\n"
31391"char4 __ovld __cnfn convert_char4_rtn(double4);\n"
31392"char4 __ovld __cnfn convert_char4_rtp(double4);\n"
31393"char4 __ovld __cnfn convert_char4_rtz(double4);\n"
31394"char4 __ovld __cnfn convert_char4_sat(double4);\n"
31395"char4 __ovld __cnfn convert_char4_sat_rte(double4);\n"
31396"char4 __ovld __cnfn convert_char4_sat_rtn(double4);\n"
31397"char4 __ovld __cnfn convert_char4_sat_rtp(double4);\n"
31398"char4 __ovld __cnfn convert_char4_sat_rtz(double4);\n"
31399"char8 __ovld __cnfn convert_char8(double8);\n"
31400"char8 __ovld __cnfn convert_char8_rte(double8);\n"
31401"char8 __ovld __cnfn convert_char8_rtn(double8);\n"
31402"char8 __ovld __cnfn convert_char8_rtp(double8);\n"
31403"char8 __ovld __cnfn convert_char8_rtz(double8);\n"
31404"char8 __ovld __cnfn convert_char8_sat(double8);\n"
31405"char8 __ovld __cnfn convert_char8_sat_rte(double8);\n"
31406"char8 __ovld __cnfn convert_char8_sat_rtn(double8);\n"
31407"char8 __ovld __cnfn convert_char8_sat_rtp(double8);\n"
31408"char8 __ovld __cnfn convert_char8_sat_rtz(double8);\n"
31409"char16 __ovld __cnfn convert_char16(double16);\n"
31410"char16 __ovld __cnfn convert_char16_rte(double16);\n"
31411"char16 __ovld __cnfn convert_char16_rtn(double16);\n"
31412"char16 __ovld __cnfn convert_char16_rtp(double16);\n"
31413"char16 __ovld __cnfn convert_char16_rtz(double16);\n"
31414"char16 __ovld __cnfn convert_char16_sat(double16);\n"
31415"char16 __ovld __cnfn convert_char16_sat_rte(double16);\n"
31416"char16 __ovld __cnfn convert_char16_sat_rtn(double16);\n"
31417"char16 __ovld __cnfn convert_char16_sat_rtp(double16);\n"
31418"char16 __ovld __cnfn convert_char16_sat_rtz(double16);\n"
31419"\n"
31420"uchar __ovld __cnfn convert_uchar(double);\n"
31421"uchar __ovld __cnfn convert_uchar_rte(double);\n"
31422"uchar __ovld __cnfn convert_uchar_rtn(double);\n"
31423"uchar __ovld __cnfn convert_uchar_rtp(double);\n"
31424"uchar __ovld __cnfn convert_uchar_rtz(double);\n"
31425"uchar __ovld __cnfn convert_uchar_sat(double);\n"
31426"uchar __ovld __cnfn convert_uchar_sat_rte(double);\n"
31427"uchar __ovld __cnfn convert_uchar_sat_rtn(double);\n"
31428"uchar __ovld __cnfn convert_uchar_sat_rtp(double);\n"
31429"uchar __ovld __cnfn convert_uchar_sat_rtz(double);\n"
31430"uchar2 __ovld __cnfn convert_uchar2(double2);\n"
31431"uchar2 __ovld __cnfn convert_uchar2_rte(double2);\n"
31432"uchar2 __ovld __cnfn convert_uchar2_rtn(double2);\n"
31433"uchar2 __ovld __cnfn convert_uchar2_rtp(double2);\n"
31434"uchar2 __ovld __cnfn convert_uchar2_rtz(double2);\n"
31435"uchar2 __ovld __cnfn convert_uchar2_sat(double2);\n"
31436"uchar2 __ovld __cnfn convert_uchar2_sat_rte(double2);\n"
31437"uchar2 __ovld __cnfn convert_uchar2_sat_rtn(double2);\n"
31438"uchar2 __ovld __cnfn convert_uchar2_sat_rtp(double2);\n"
31439"uchar2 __ovld __cnfn convert_uchar2_sat_rtz(double2);\n"
31440"uchar3 __ovld __cnfn convert_uchar3(double3);\n"
31441"uchar3 __ovld __cnfn convert_uchar3_rte(double3);\n"
31442"uchar3 __ovld __cnfn convert_uchar3_rtn(double3);\n"
31443"uchar3 __ovld __cnfn convert_uchar3_rtp(double3);\n"
31444"uchar3 __ovld __cnfn convert_uchar3_rtz(double3);\n"
31445"uchar3 __ovld __cnfn convert_uchar3_sat(double3);\n"
31446"uchar3 __ovld __cnfn convert_uchar3_sat_rte(double3);\n"
31447"uchar3 __ovld __cnfn convert_uchar3_sat_rtn(double3);\n"
31448"uchar3 __ovld __cnfn convert_uchar3_sat_rtp(double3);\n"
31449"uchar3 __ovld __cnfn convert_uchar3_sat_rtz(double3);\n"
31450"uchar4 __ovld __cnfn convert_uchar4(double4);\n"
31451"uchar4 __ovld __cnfn convert_uchar4_rte(double4);\n"
31452"uchar4 __ovld __cnfn convert_uchar4_rtn(double4);\n"
31453"uchar4 __ovld __cnfn convert_uchar4_rtp(double4);\n"
31454"uchar4 __ovld __cnfn convert_uchar4_rtz(double4);\n"
31455"uchar4 __ovld __cnfn convert_uchar4_sat(double4);\n"
31456"uchar4 __ovld __cnfn convert_uchar4_sat_rte(double4);\n"
31457"uchar4 __ovld __cnfn convert_uchar4_sat_rtn(double4);\n"
31458"uchar4 __ovld __cnfn convert_uchar4_sat_rtp(double4);\n"
31459"uchar4 __ovld __cnfn convert_uchar4_sat_rtz(double4);\n"
31460"uchar8 __ovld __cnfn convert_uchar8(double8);\n"
31461"uchar8 __ovld __cnfn convert_uchar8_rte(double8);\n"
31462"uchar8 __ovld __cnfn convert_uchar8_rtn(double8);\n"
31463"uchar8 __ovld __cnfn convert_uchar8_rtp(double8);\n"
31464"uchar8 __ovld __cnfn convert_uchar8_rtz(double8);\n"
31465"uchar8 __ovld __cnfn convert_uchar8_sat(double8);\n"
31466"uchar8 __ovld __cnfn convert_uchar8_sat_rte(double8);\n"
31467"uchar8 __ovld __cnfn convert_uchar8_sat_rtn(double8);\n"
31468"uchar8 __ovld __cnfn convert_uchar8_sat_rtp(double8);\n"
31469"uchar8 __ovld __cnfn convert_uchar8_sat_rtz(double8);\n"
31470"uchar16 __ovld __cnfn convert_uchar16(double16);\n"
31471"uchar16 __ovld __cnfn convert_uchar16_rte(double16);\n"
31472"uchar16 __ovld __cnfn convert_uchar16_rtn(double16);\n"
31473"uchar16 __ovld __cnfn convert_uchar16_rtp(double16);\n"
31474"uchar16 __ovld __cnfn convert_uchar16_rtz(double16);\n"
31475"uchar16 __ovld __cnfn convert_uchar16_sat(double16);\n"
31476"uchar16 __ovld __cnfn convert_uchar16_sat_rte(double16);\n"
31477"uchar16 __ovld __cnfn convert_uchar16_sat_rtn(double16);\n"
31478"uchar16 __ovld __cnfn convert_uchar16_sat_rtp(double16);\n"
31479"uchar16 __ovld __cnfn convert_uchar16_sat_rtz(double16);\n"
31480"\n"
31481"short __ovld __cnfn convert_short(double);\n"
31482"short __ovld __cnfn convert_short_rte(double);\n"
31483"short __ovld __cnfn convert_short_rtn(double);\n"
31484"short __ovld __cnfn convert_short_rtp(double);\n"
31485"short __ovld __cnfn convert_short_rtz(double);\n"
31486"short __ovld __cnfn convert_short_sat(double);\n"
31487"short __ovld __cnfn convert_short_sat_rte(double);\n"
31488"short __ovld __cnfn convert_short_sat_rtn(double);\n"
31489"short __ovld __cnfn convert_short_sat_rtp(double);\n"
31490"short __ovld __cnfn convert_short_sat_rtz(double);\n"
31491"short2 __ovld __cnfn convert_short2(double2);\n"
31492"short2 __ovld __cnfn convert_short2_rte(double2);\n"
31493"short2 __ovld __cnfn convert_short2_rtn(double2);\n"
31494"short2 __ovld __cnfn convert_short2_rtp(double2);\n"
31495"short2 __ovld __cnfn convert_short2_rtz(double2);\n"
31496"short2 __ovld __cnfn convert_short2_sat(double2);\n"
31497"short2 __ovld __cnfn convert_short2_sat_rte(double2);\n"
31498"short2 __ovld __cnfn convert_short2_sat_rtn(double2);\n"
31499"short2 __ovld __cnfn convert_short2_sat_rtp(double2);\n"
31500"short2 __ovld __cnfn convert_short2_sat_rtz(double2);\n"
31501"short3 __ovld __cnfn convert_short3(double3);\n"
31502"short3 __ovld __cnfn convert_short3_rte(double3);\n"
31503"short3 __ovld __cnfn convert_short3_rtn(double3);\n"
31504"short3 __ovld __cnfn convert_short3_rtp(double3);\n"
31505"short3 __ovld __cnfn convert_short3_rtz(double3);\n"
31506"short3 __ovld __cnfn convert_short3_sat(double3);\n"
31507"short3 __ovld __cnfn convert_short3_sat_rte(double3);\n"
31508"short3 __ovld __cnfn convert_short3_sat_rtn(double3);\n"
31509"short3 __ovld __cnfn convert_short3_sat_rtp(double3);\n"
31510"short3 __ovld __cnfn convert_short3_sat_rtz(double3);\n"
31511"short4 __ovld __cnfn convert_short4(double4);\n"
31512"short4 __ovld __cnfn convert_short4_rte(double4);\n"
31513"short4 __ovld __cnfn convert_short4_rtn(double4);\n"
31514"short4 __ovld __cnfn convert_short4_rtp(double4);\n"
31515"short4 __ovld __cnfn convert_short4_rtz(double4);\n"
31516"short4 __ovld __cnfn convert_short4_sat(double4);\n"
31517"short4 __ovld __cnfn convert_short4_sat_rte(double4);\n"
31518"short4 __ovld __cnfn convert_short4_sat_rtn(double4);\n"
31519"short4 __ovld __cnfn convert_short4_sat_rtp(double4);\n"
31520"short4 __ovld __cnfn convert_short4_sat_rtz(double4);\n"
31521"short8 __ovld __cnfn convert_short8(double8);\n"
31522"short8 __ovld __cnfn convert_short8_rte(double8);\n"
31523"short8 __ovld __cnfn convert_short8_rtn(double8);\n"
31524"short8 __ovld __cnfn convert_short8_rtp(double8);\n"
31525"short8 __ovld __cnfn convert_short8_rtz(double8);\n"
31526"short8 __ovld __cnfn convert_short8_sat(double8);\n"
31527"short8 __ovld __cnfn convert_short8_sat_rte(double8);\n"
31528"short8 __ovld __cnfn convert_short8_sat_rtn(double8);\n"
31529"short8 __ovld __cnfn convert_short8_sat_rtp(double8);\n"
31530"short8 __ovld __cnfn convert_short8_sat_rtz(double8);\n"
31531"short16 __ovld __cnfn convert_short16(double16);\n"
31532"short16 __ovld __cnfn convert_short16_rte(double16);\n"
31533"short16 __ovld __cnfn convert_short16_rtn(double16);\n"
31534"short16 __ovld __cnfn convert_short16_rtp(double16);\n"
31535"short16 __ovld __cnfn convert_short16_rtz(double16);\n"
31536"short16 __ovld __cnfn convert_short16_sat(double16);\n"
31537"short16 __ovld __cnfn convert_short16_sat_rte(double16);\n"
31538"short16 __ovld __cnfn convert_short16_sat_rtn(double16);\n"
31539"short16 __ovld __cnfn convert_short16_sat_rtp(double16);\n"
31540"short16 __ovld __cnfn convert_short16_sat_rtz(double16);\n"
31541"\n"
31542"ushort __ovld __cnfn convert_ushort(double);\n"
31543"ushort __ovld __cnfn convert_ushort_rte(double);\n"
31544"ushort __ovld __cnfn convert_ushort_rtn(double);\n"
31545"ushort __ovld __cnfn convert_ushort_rtp(double);\n"
31546"ushort __ovld __cnfn convert_ushort_rtz(double);\n"
31547"ushort __ovld __cnfn convert_ushort_sat(double);\n"
31548"ushort __ovld __cnfn convert_ushort_sat_rte(double);\n"
31549"ushort __ovld __cnfn convert_ushort_sat_rtn(double);\n"
31550"ushort __ovld __cnfn convert_ushort_sat_rtp(double);\n"
31551"ushort __ovld __cnfn convert_ushort_sat_rtz(double);\n"
31552"ushort2 __ovld __cnfn convert_ushort2(double2);\n"
31553"ushort2 __ovld __cnfn convert_ushort2_rte(double2);\n"
31554"ushort2 __ovld __cnfn convert_ushort2_rtn(double2);\n"
31555"ushort2 __ovld __cnfn convert_ushort2_rtp(double2);\n"
31556"ushort2 __ovld __cnfn convert_ushort2_rtz(double2);\n"
31557"ushort2 __ovld __cnfn convert_ushort2_sat(double2);\n"
31558"ushort2 __ovld __cnfn convert_ushort2_sat_rte(double2);\n"
31559"ushort2 __ovld __cnfn convert_ushort2_sat_rtn(double2);\n"
31560"ushort2 __ovld __cnfn convert_ushort2_sat_rtp(double2);\n"
31561"ushort2 __ovld __cnfn convert_ushort2_sat_rtz(double2);\n"
31562"ushort3 __ovld __cnfn convert_ushort3(double3);\n"
31563"ushort3 __ovld __cnfn convert_ushort3_rte(double3);\n"
31564"ushort3 __ovld __cnfn convert_ushort3_rtn(double3);\n"
31565"ushort3 __ovld __cnfn convert_ushort3_rtp(double3);\n"
31566"ushort3 __ovld __cnfn convert_ushort3_rtz(double3);\n"
31567"ushort3 __ovld __cnfn convert_ushort3_sat(double3);\n"
31568"ushort3 __ovld __cnfn convert_ushort3_sat_rte(double3);\n"
31569"ushort3 __ovld __cnfn convert_ushort3_sat_rtn(double3);\n"
31570"ushort3 __ovld __cnfn convert_ushort3_sat_rtp(double3);\n"
31571"ushort3 __ovld __cnfn convert_ushort3_sat_rtz(double3);\n"
31572"ushort4 __ovld __cnfn convert_ushort4(double4);\n"
31573"ushort4 __ovld __cnfn convert_ushort4_rte(double4);\n"
31574"ushort4 __ovld __cnfn convert_ushort4_rtn(double4);\n"
31575"ushort4 __ovld __cnfn convert_ushort4_rtp(double4);\n"
31576"ushort4 __ovld __cnfn convert_ushort4_rtz(double4);\n"
31577"ushort4 __ovld __cnfn convert_ushort4_sat(double4);\n"
31578"ushort4 __ovld __cnfn convert_ushort4_sat_rte(double4);\n"
31579"ushort4 __ovld __cnfn convert_ushort4_sat_rtn(double4);\n"
31580"ushort4 __ovld __cnfn convert_ushort4_sat_rtp(double4);\n"
31581"ushort4 __ovld __cnfn convert_ushort4_sat_rtz(double4);\n"
31582"ushort8 __ovld __cnfn convert_ushort8(double8);\n"
31583"ushort8 __ovld __cnfn convert_ushort8_rte(double8);\n"
31584"ushort8 __ovld __cnfn convert_ushort8_rtn(double8);\n"
31585"ushort8 __ovld __cnfn convert_ushort8_rtp(double8);\n"
31586"ushort8 __ovld __cnfn convert_ushort8_rtz(double8);\n"
31587"ushort8 __ovld __cnfn convert_ushort8_sat(double8);\n"
31588"ushort8 __ovld __cnfn convert_ushort8_sat_rte(double8);\n"
31589"ushort8 __ovld __cnfn convert_ushort8_sat_rtn(double8);\n"
31590"ushort8 __ovld __cnfn convert_ushort8_sat_rtp(double8);\n"
31591"ushort8 __ovld __cnfn convert_ushort8_sat_rtz(double8);\n"
31592"ushort16 __ovld __cnfn convert_ushort16(double16);\n"
31593"ushort16 __ovld __cnfn convert_ushort16_rte(double16);\n"
31594"ushort16 __ovld __cnfn convert_ushort16_rtn(double16);\n"
31595"ushort16 __ovld __cnfn convert_ushort16_rtp(double16);\n"
31596"ushort16 __ovld __cnfn convert_ushort16_rtz(double16);\n"
31597"ushort16 __ovld __cnfn convert_ushort16_sat(double16);\n"
31598"ushort16 __ovld __cnfn convert_ushort16_sat_rte(double16);\n"
31599"ushort16 __ovld __cnfn convert_ushort16_sat_rtn(double16);\n"
31600"ushort16 __ovld __cnfn convert_ushort16_sat_rtp(double16);\n"
31601"ushort16 __ovld __cnfn convert_ushort16_sat_rtz(double16);\n"
31602"\n"
31603"int __ovld __cnfn convert_int(double);\n"
31604"int __ovld __cnfn convert_int_rte(double);\n"
31605"int __ovld __cnfn convert_int_rtn(double);\n"
31606"int __ovld __cnfn convert_int_rtp(double);\n"
31607"int __ovld __cnfn convert_int_rtz(double);\n"
31608"int __ovld __cnfn convert_int_sat(double);\n"
31609"int __ovld __cnfn convert_int_sat_rte(double);\n"
31610"int __ovld __cnfn convert_int_sat_rtn(double);\n"
31611"int __ovld __cnfn convert_int_sat_rtp(double);\n"
31612"int __ovld __cnfn convert_int_sat_rtz(double);\n"
31613"int2 __ovld __cnfn convert_int2(double2);\n"
31614"int2 __ovld __cnfn convert_int2_rte(double2);\n"
31615"int2 __ovld __cnfn convert_int2_rtn(double2);\n"
31616"int2 __ovld __cnfn convert_int2_rtp(double2);\n"
31617"int2 __ovld __cnfn convert_int2_rtz(double2);\n"
31618"int2 __ovld __cnfn convert_int2_sat(double2);\n"
31619"int2 __ovld __cnfn convert_int2_sat_rte(double2);\n"
31620"int2 __ovld __cnfn convert_int2_sat_rtn(double2);\n"
31621"int2 __ovld __cnfn convert_int2_sat_rtp(double2);\n"
31622"int2 __ovld __cnfn convert_int2_sat_rtz(double2);\n"
31623"int3 __ovld __cnfn convert_int3(double3);\n"
31624"int3 __ovld __cnfn convert_int3_rte(double3);\n"
31625"int3 __ovld __cnfn convert_int3_rtn(double3);\n"
31626"int3 __ovld __cnfn convert_int3_rtp(double3);\n"
31627"int3 __ovld __cnfn convert_int3_rtz(double3);\n"
31628"int3 __ovld __cnfn convert_int3_sat(double3);\n"
31629"int3 __ovld __cnfn convert_int3_sat_rte(double3);\n"
31630"int3 __ovld __cnfn convert_int3_sat_rtn(double3);\n"
31631"int3 __ovld __cnfn convert_int3_sat_rtp(double3);\n"
31632"int3 __ovld __cnfn convert_int3_sat_rtz(double3);\n"
31633"int4 __ovld __cnfn convert_int4(double4);\n"
31634"int4 __ovld __cnfn convert_int4_rte(double4);\n"
31635"int4 __ovld __cnfn convert_int4_rtn(double4);\n"
31636"int4 __ovld __cnfn convert_int4_rtp(double4);\n"
31637"int4 __ovld __cnfn convert_int4_rtz(double4);\n"
31638"int4 __ovld __cnfn convert_int4_sat(double4);\n"
31639"int4 __ovld __cnfn convert_int4_sat_rte(double4);\n"
31640"int4 __ovld __cnfn convert_int4_sat_rtn(double4);\n"
31641"int4 __ovld __cnfn convert_int4_sat_rtp(double4);\n"
31642"int4 __ovld __cnfn convert_int4_sat_rtz(double4);\n"
31643"int8 __ovld __cnfn convert_int8(double8);\n"
31644"int8 __ovld __cnfn convert_int8_rte(double8);\n"
31645"int8 __ovld __cnfn convert_int8_rtn(double8);\n"
31646"int8 __ovld __cnfn convert_int8_rtp(double8);\n"
31647"int8 __ovld __cnfn convert_int8_rtz(double8);\n"
31648"int8 __ovld __cnfn convert_int8_sat(double8);\n"
31649"int8 __ovld __cnfn convert_int8_sat_rte(double8);\n"
31650"int8 __ovld __cnfn convert_int8_sat_rtn(double8);\n"
31651"int8 __ovld __cnfn convert_int8_sat_rtp(double8);\n"
31652"int8 __ovld __cnfn convert_int8_sat_rtz(double8);\n"
31653"int16 __ovld __cnfn convert_int16(double16);\n"
31654"int16 __ovld __cnfn convert_int16_rte(double16);\n"
31655"int16 __ovld __cnfn convert_int16_rtn(double16);\n"
31656"int16 __ovld __cnfn convert_int16_rtp(double16);\n"
31657"int16 __ovld __cnfn convert_int16_rtz(double16);\n"
31658"int16 __ovld __cnfn convert_int16_sat(double16);\n"
31659"int16 __ovld __cnfn convert_int16_sat_rte(double16);\n"
31660"int16 __ovld __cnfn convert_int16_sat_rtn(double16);\n"
31661"int16 __ovld __cnfn convert_int16_sat_rtp(double16);\n"
31662"int16 __ovld __cnfn convert_int16_sat_rtz(double16);\n"
31663"\n"
31664"uint __ovld __cnfn convert_uint(double);\n"
31665"uint __ovld __cnfn convert_uint_rte(double);\n"
31666"uint __ovld __cnfn convert_uint_rtn(double);\n"
31667"uint __ovld __cnfn convert_uint_rtp(double);\n"
31668"uint __ovld __cnfn convert_uint_rtz(double);\n"
31669"uint __ovld __cnfn convert_uint_sat(double);\n"
31670"uint __ovld __cnfn convert_uint_sat_rte(double);\n"
31671"uint __ovld __cnfn convert_uint_sat_rtn(double);\n"
31672"uint __ovld __cnfn convert_uint_sat_rtp(double);\n"
31673"uint __ovld __cnfn convert_uint_sat_rtz(double);\n"
31674"uint2 __ovld __cnfn convert_uint2(double2);\n"
31675"uint2 __ovld __cnfn convert_uint2_rte(double2);\n"
31676"uint2 __ovld __cnfn convert_uint2_rtn(double2);\n"
31677"uint2 __ovld __cnfn convert_uint2_rtp(double2);\n"
31678"uint2 __ovld __cnfn convert_uint2_rtz(double2);\n"
31679"uint2 __ovld __cnfn convert_uint2_sat(double2);\n"
31680"uint2 __ovld __cnfn convert_uint2_sat_rte(double2);\n"
31681"uint2 __ovld __cnfn convert_uint2_sat_rtn(double2);\n"
31682"uint2 __ovld __cnfn convert_uint2_sat_rtp(double2);\n"
31683"uint2 __ovld __cnfn convert_uint2_sat_rtz(double2);\n"
31684"uint3 __ovld __cnfn convert_uint3(double3);\n"
31685"uint3 __ovld __cnfn convert_uint3_rte(double3);\n"
31686"uint3 __ovld __cnfn convert_uint3_rtn(double3);\n"
31687"uint3 __ovld __cnfn convert_uint3_rtp(double3);\n"
31688"uint3 __ovld __cnfn convert_uint3_rtz(double3);\n"
31689"uint3 __ovld __cnfn convert_uint3_sat(double3);\n"
31690"uint3 __ovld __cnfn convert_uint3_sat_rte(double3);\n"
31691"uint3 __ovld __cnfn convert_uint3_sat_rtn(double3);\n"
31692"uint3 __ovld __cnfn convert_uint3_sat_rtp(double3);\n"
31693"uint3 __ovld __cnfn convert_uint3_sat_rtz(double3);\n"
31694"uint4 __ovld __cnfn convert_uint4(double4);\n"
31695"uint4 __ovld __cnfn convert_uint4_rte(double4);\n"
31696"uint4 __ovld __cnfn convert_uint4_rtn(double4);\n"
31697"uint4 __ovld __cnfn convert_uint4_rtp(double4);\n"
31698"uint4 __ovld __cnfn convert_uint4_rtz(double4);\n"
31699"uint4 __ovld __cnfn convert_uint4_sat(double4);\n"
31700"uint4 __ovld __cnfn convert_uint4_sat_rte(double4);\n"
31701"uint4 __ovld __cnfn convert_uint4_sat_rtn(double4);\n"
31702"uint4 __ovld __cnfn convert_uint4_sat_rtp(double4);\n"
31703"uint4 __ovld __cnfn convert_uint4_sat_rtz(double4);\n"
31704"uint8 __ovld __cnfn convert_uint8(double8);\n"
31705"uint8 __ovld __cnfn convert_uint8_rte(double8);\n"
31706"uint8 __ovld __cnfn convert_uint8_rtn(double8);\n"
31707"uint8 __ovld __cnfn convert_uint8_rtp(double8);\n"
31708"uint8 __ovld __cnfn convert_uint8_rtz(double8);\n"
31709"uint8 __ovld __cnfn convert_uint8_sat(double8);\n"
31710"uint8 __ovld __cnfn convert_uint8_sat_rte(double8);\n"
31711"uint8 __ovld __cnfn convert_uint8_sat_rtn(double8);\n"
31712"uint8 __ovld __cnfn convert_uint8_sat_rtp(double8);\n"
31713"uint8 __ovld __cnfn convert_uint8_sat_rtz(double8);\n"
31714"uint16 __ovld __cnfn convert_uint16(double16);\n"
31715"uint16 __ovld __cnfn convert_uint16_rte(double16);\n"
31716"uint16 __ovld __cnfn convert_uint16_rtn(double16);\n"
31717"uint16 __ovld __cnfn convert_uint16_rtp(double16);\n"
31718"uint16 __ovld __cnfn convert_uint16_rtz(double16);\n"
31719"uint16 __ovld __cnfn convert_uint16_sat(double16);\n"
31720"uint16 __ovld __cnfn convert_uint16_sat_rte(double16);\n"
31721"uint16 __ovld __cnfn convert_uint16_sat_rtn(double16);\n"
31722"uint16 __ovld __cnfn convert_uint16_sat_rtp(double16);\n"
31723"uint16 __ovld __cnfn convert_uint16_sat_rtz(double16);\n"
31724"\n"
31725"long __ovld __cnfn convert_long(double);\n"
31726"long __ovld __cnfn convert_long_rte(double);\n"
31727"long __ovld __cnfn convert_long_rtn(double);\n"
31728"long __ovld __cnfn convert_long_rtp(double);\n"
31729"long __ovld __cnfn convert_long_rtz(double);\n"
31730"long __ovld __cnfn convert_long_sat(double);\n"
31731"long __ovld __cnfn convert_long_sat_rte(double);\n"
31732"long __ovld __cnfn convert_long_sat_rtn(double);\n"
31733"long __ovld __cnfn convert_long_sat_rtp(double);\n"
31734"long __ovld __cnfn convert_long_sat_rtz(double);\n"
31735"long2 __ovld __cnfn convert_long2(double2);\n"
31736"long2 __ovld __cnfn convert_long2_rte(double2);\n"
31737"long2 __ovld __cnfn convert_long2_rtn(double2);\n"
31738"long2 __ovld __cnfn convert_long2_rtp(double2);\n"
31739"long2 __ovld __cnfn convert_long2_rtz(double2);\n"
31740"long2 __ovld __cnfn convert_long2_sat(double2);\n"
31741"long2 __ovld __cnfn convert_long2_sat_rte(double2);\n"
31742"long2 __ovld __cnfn convert_long2_sat_rtn(double2);\n"
31743"long2 __ovld __cnfn convert_long2_sat_rtp(double2);\n"
31744"long2 __ovld __cnfn convert_long2_sat_rtz(double2);\n"
31745"long3 __ovld __cnfn convert_long3(double3);\n"
31746"long3 __ovld __cnfn convert_long3_rte(double3);\n"
31747"long3 __ovld __cnfn convert_long3_rtn(double3);\n"
31748"long3 __ovld __cnfn convert_long3_rtp(double3);\n"
31749"long3 __ovld __cnfn convert_long3_rtz(double3);\n"
31750"long3 __ovld __cnfn convert_long3_sat(double3);\n"
31751"long3 __ovld __cnfn convert_long3_sat_rte(double3);\n"
31752"long3 __ovld __cnfn convert_long3_sat_rtn(double3);\n"
31753"long3 __ovld __cnfn convert_long3_sat_rtp(double3);\n"
31754"long3 __ovld __cnfn convert_long3_sat_rtz(double3);\n"
31755"long4 __ovld __cnfn convert_long4(double4);\n"
31756"long4 __ovld __cnfn convert_long4_rte(double4);\n"
31757"long4 __ovld __cnfn convert_long4_rtn(double4);\n"
31758"long4 __ovld __cnfn convert_long4_rtp(double4);\n"
31759"long4 __ovld __cnfn convert_long4_rtz(double4);\n"
31760"long4 __ovld __cnfn convert_long4_sat(double4);\n"
31761"long4 __ovld __cnfn convert_long4_sat_rte(double4);\n"
31762"long4 __ovld __cnfn convert_long4_sat_rtn(double4);\n"
31763"long4 __ovld __cnfn convert_long4_sat_rtp(double4);\n"
31764"long4 __ovld __cnfn convert_long4_sat_rtz(double4);\n"
31765"long8 __ovld __cnfn convert_long8(double8);\n"
31766"long8 __ovld __cnfn convert_long8_rte(double8);\n"
31767"long8 __ovld __cnfn convert_long8_rtn(double8);\n"
31768"long8 __ovld __cnfn convert_long8_rtp(double8);\n"
31769"long8 __ovld __cnfn convert_long8_rtz(double8);\n"
31770"long8 __ovld __cnfn convert_long8_sat(double8);\n"
31771"long8 __ovld __cnfn convert_long8_sat_rte(double8);\n"
31772"long8 __ovld __cnfn convert_long8_sat_rtn(double8);\n"
31773"long8 __ovld __cnfn convert_long8_sat_rtp(double8);\n"
31774"long8 __ovld __cnfn convert_long8_sat_rtz(double8);\n"
31775"long16 __ovld __cnfn convert_long16(double16);\n"
31776"long16 __ovld __cnfn convert_long16_rte(double16);\n"
31777"long16 __ovld __cnfn convert_long16_rtn(double16);\n"
31778"long16 __ovld __cnfn convert_long16_rtp(double16);\n"
31779"long16 __ovld __cnfn convert_long16_rtz(double16);\n"
31780"long16 __ovld __cnfn convert_long16_sat(double16);\n"
31781"long16 __ovld __cnfn convert_long16_sat_rte(double16);\n"
31782"long16 __ovld __cnfn convert_long16_sat_rtn(double16);\n"
31783"long16 __ovld __cnfn convert_long16_sat_rtp(double16);\n"
31784"long16 __ovld __cnfn convert_long16_sat_rtz(double16);\n"
31785"\n"
31786"ulong __ovld __cnfn convert_ulong(double);\n"
31787"ulong __ovld __cnfn convert_ulong_rte(double);\n"
31788"ulong __ovld __cnfn convert_ulong_rtn(double);\n"
31789"ulong __ovld __cnfn convert_ulong_rtp(double);\n"
31790"ulong __ovld __cnfn convert_ulong_rtz(double);\n"
31791"ulong __ovld __cnfn convert_ulong_sat(double);\n"
31792"ulong __ovld __cnfn convert_ulong_sat_rte(double);\n"
31793"ulong __ovld __cnfn convert_ulong_sat_rtn(double);\n"
31794"ulong __ovld __cnfn convert_ulong_sat_rtp(double);\n"
31795"ulong __ovld __cnfn convert_ulong_sat_rtz(double);\n"
31796"ulong2 __ovld __cnfn convert_ulong2(double2);\n"
31797"ulong2 __ovld __cnfn convert_ulong2_rte(double2);\n"
31798"ulong2 __ovld __cnfn convert_ulong2_rtn(double2);\n"
31799"ulong2 __ovld __cnfn convert_ulong2_rtp(double2);\n"
31800"ulong2 __ovld __cnfn convert_ulong2_rtz(double2);\n"
31801"ulong2 __ovld __cnfn convert_ulong2_sat(double2);\n"
31802"ulong2 __ovld __cnfn convert_ulong2_sat_rte(double2);\n"
31803"ulong2 __ovld __cnfn convert_ulong2_sat_rtn(double2);\n"
31804"ulong2 __ovld __cnfn convert_ulong2_sat_rtp(double2);\n"
31805"ulong2 __ovld __cnfn convert_ulong2_sat_rtz(double2);\n"
31806"ulong3 __ovld __cnfn convert_ulong3(double3);\n"
31807"ulong3 __ovld __cnfn convert_ulong3_rte(double3);\n"
31808"ulong3 __ovld __cnfn convert_ulong3_rtn(double3);\n"
31809"ulong3 __ovld __cnfn convert_ulong3_rtp(double3);\n"
31810"ulong3 __ovld __cnfn convert_ulong3_rtz(double3);\n"
31811"ulong3 __ovld __cnfn convert_ulong3_sat(double3);\n"
31812"ulong3 __ovld __cnfn convert_ulong3_sat_rte(double3);\n"
31813"ulong3 __ovld __cnfn convert_ulong3_sat_rtn(double3);\n"
31814"ulong3 __ovld __cnfn convert_ulong3_sat_rtp(double3);\n"
31815"ulong3 __ovld __cnfn convert_ulong3_sat_rtz(double3);\n"
31816"ulong4 __ovld __cnfn convert_ulong4(double4);\n"
31817"ulong4 __ovld __cnfn convert_ulong4_rte(double4);\n"
31818"ulong4 __ovld __cnfn convert_ulong4_rtn(double4);\n"
31819"ulong4 __ovld __cnfn convert_ulong4_rtp(double4);\n"
31820"ulong4 __ovld __cnfn convert_ulong4_rtz(double4);\n"
31821"ulong4 __ovld __cnfn convert_ulong4_sat(double4);\n"
31822"ulong4 __ovld __cnfn convert_ulong4_sat_rte(double4);\n"
31823"ulong4 __ovld __cnfn convert_ulong4_sat_rtn(double4);\n"
31824"ulong4 __ovld __cnfn convert_ulong4_sat_rtp(double4);\n"
31825"ulong4 __ovld __cnfn convert_ulong4_sat_rtz(double4);\n"
31826"ulong8 __ovld __cnfn convert_ulong8(double8);\n"
31827"ulong8 __ovld __cnfn convert_ulong8_rte(double8);\n"
31828"ulong8 __ovld __cnfn convert_ulong8_rtn(double8);\n"
31829"ulong8 __ovld __cnfn convert_ulong8_rtp(double8);\n"
31830"ulong8 __ovld __cnfn convert_ulong8_rtz(double8);\n"
31831"ulong8 __ovld __cnfn convert_ulong8_sat(double8);\n"
31832"ulong8 __ovld __cnfn convert_ulong8_sat_rte(double8);\n"
31833"ulong8 __ovld __cnfn convert_ulong8_sat_rtn(double8);\n"
31834"ulong8 __ovld __cnfn convert_ulong8_sat_rtp(double8);\n"
31835"ulong8 __ovld __cnfn convert_ulong8_sat_rtz(double8);\n"
31836"ulong16 __ovld __cnfn convert_ulong16(double16);\n"
31837"ulong16 __ovld __cnfn convert_ulong16_rte(double16);\n"
31838"ulong16 __ovld __cnfn convert_ulong16_rtn(double16);\n"
31839"ulong16 __ovld __cnfn convert_ulong16_rtp(double16);\n"
31840"ulong16 __ovld __cnfn convert_ulong16_rtz(double16);\n"
31841"ulong16 __ovld __cnfn convert_ulong16_sat(double16);\n"
31842"ulong16 __ovld __cnfn convert_ulong16_sat_rte(double16);\n"
31843"ulong16 __ovld __cnfn convert_ulong16_sat_rtn(double16);\n"
31844"ulong16 __ovld __cnfn convert_ulong16_sat_rtp(double16);\n"
31845"ulong16 __ovld __cnfn convert_ulong16_sat_rtz(double16);\n"
31846"\n"
31847"float __ovld __cnfn convert_float(double);\n"
31848"float __ovld __cnfn convert_float_rte(double);\n"
31849"float __ovld __cnfn convert_float_rtn(double);\n"
31850"float __ovld __cnfn convert_float_rtp(double);\n"
31851"float __ovld __cnfn convert_float_rtz(double);\n"
31852"float2 __ovld __cnfn convert_float2(double2);\n"
31853"float2 __ovld __cnfn convert_float2_rte(double2);\n"
31854"float2 __ovld __cnfn convert_float2_rtn(double2);\n"
31855"float2 __ovld __cnfn convert_float2_rtp(double2);\n"
31856"float2 __ovld __cnfn convert_float2_rtz(double2);\n"
31857"float3 __ovld __cnfn convert_float3(double3);\n"
31858"float3 __ovld __cnfn convert_float3_rte(double3);\n"
31859"float3 __ovld __cnfn convert_float3_rtn(double3);\n"
31860"float3 __ovld __cnfn convert_float3_rtp(double3);\n"
31861"float3 __ovld __cnfn convert_float3_rtz(double3);\n"
31862"float4 __ovld __cnfn convert_float4(double4);\n"
31863"float4 __ovld __cnfn convert_float4_rte(double4);\n"
31864"float4 __ovld __cnfn convert_float4_rtn(double4);\n"
31865"float4 __ovld __cnfn convert_float4_rtp(double4);\n"
31866"float4 __ovld __cnfn convert_float4_rtz(double4);\n"
31867"float8 __ovld __cnfn convert_float8(double8);\n"
31868"float8 __ovld __cnfn convert_float8_rte(double8);\n"
31869"float8 __ovld __cnfn convert_float8_rtn(double8);\n"
31870"float8 __ovld __cnfn convert_float8_rtp(double8);\n"
31871"float8 __ovld __cnfn convert_float8_rtz(double8);\n"
31872"float16 __ovld __cnfn convert_float16(double16);\n"
31873"float16 __ovld __cnfn convert_float16_rte(double16);\n"
31874"float16 __ovld __cnfn convert_float16_rtn(double16);\n"
31875"float16 __ovld __cnfn convert_float16_rtp(double16);\n"
31876"float16 __ovld __cnfn convert_float16_rtz(double16);\n"
31877"\n"
31878"double __ovld __cnfn convert_double(char);\n"
31879"double __ovld __cnfn convert_double(double);\n"
31880"double __ovld __cnfn convert_double(float);\n"
31881"double __ovld __cnfn convert_double(int);\n"
31882"double __ovld __cnfn convert_double(long);\n"
31883"double __ovld __cnfn convert_double(short);\n"
31884"double __ovld __cnfn convert_double(uchar);\n"
31885"double __ovld __cnfn convert_double(uint);\n"
31886"double __ovld __cnfn convert_double(ulong);\n"
31887"double __ovld __cnfn convert_double(ushort);\n"
31888"double __ovld __cnfn convert_double_rte(char);\n"
31889"double __ovld __cnfn convert_double_rte(double);\n"
31890"double __ovld __cnfn convert_double_rte(float);\n"
31891"double __ovld __cnfn convert_double_rte(int);\n"
31892"double __ovld __cnfn convert_double_rte(long);\n"
31893"double __ovld __cnfn convert_double_rte(short);\n"
31894"double __ovld __cnfn convert_double_rte(uchar);\n"
31895"double __ovld __cnfn convert_double_rte(uint);\n"
31896"double __ovld __cnfn convert_double_rte(ulong);\n"
31897"double __ovld __cnfn convert_double_rte(ushort);\n"
31898"double __ovld __cnfn convert_double_rtn(char);\n"
31899"double __ovld __cnfn convert_double_rtn(double);\n"
31900"double __ovld __cnfn convert_double_rtn(float);\n"
31901"double __ovld __cnfn convert_double_rtn(int);\n"
31902"double __ovld __cnfn convert_double_rtn(long);\n"
31903"double __ovld __cnfn convert_double_rtn(short);\n"
31904"double __ovld __cnfn convert_double_rtn(uchar);\n"
31905"double __ovld __cnfn convert_double_rtn(uint);\n"
31906"double __ovld __cnfn convert_double_rtn(ulong);\n"
31907"double __ovld __cnfn convert_double_rtn(ushort);\n"
31908"double __ovld __cnfn convert_double_rtp(char);\n"
31909"double __ovld __cnfn convert_double_rtp(double);\n"
31910"double __ovld __cnfn convert_double_rtp(float);\n"
31911"double __ovld __cnfn convert_double_rtp(int);\n"
31912"double __ovld __cnfn convert_double_rtp(long);\n"
31913"double __ovld __cnfn convert_double_rtp(short);\n"
31914"double __ovld __cnfn convert_double_rtp(uchar);\n"
31915"double __ovld __cnfn convert_double_rtp(uint);\n"
31916"double __ovld __cnfn convert_double_rtp(ulong);\n"
31917"double __ovld __cnfn convert_double_rtp(ushort);\n"
31918"double __ovld __cnfn convert_double_rtz(char);\n"
31919"double __ovld __cnfn convert_double_rtz(double);\n"
31920"double __ovld __cnfn convert_double_rtz(float);\n"
31921"double __ovld __cnfn convert_double_rtz(int);\n"
31922"double __ovld __cnfn convert_double_rtz(long);\n"
31923"double __ovld __cnfn convert_double_rtz(short);\n"
31924"double __ovld __cnfn convert_double_rtz(uchar);\n"
31925"double __ovld __cnfn convert_double_rtz(uint);\n"
31926"double __ovld __cnfn convert_double_rtz(ulong);\n"
31927"double __ovld __cnfn convert_double_rtz(ushort);\n"
31928"double2 __ovld __cnfn convert_double2(char2);\n"
31929"double2 __ovld __cnfn convert_double2(double2);\n"
31930"double2 __ovld __cnfn convert_double2(float2);\n"
31931"double2 __ovld __cnfn convert_double2(int2);\n"
31932"double2 __ovld __cnfn convert_double2(long2);\n"
31933"double2 __ovld __cnfn convert_double2(short2);\n"
31934"double2 __ovld __cnfn convert_double2(uchar2);\n"
31935"double2 __ovld __cnfn convert_double2(uint2);\n"
31936"double2 __ovld __cnfn convert_double2(ulong2);\n"
31937"double2 __ovld __cnfn convert_double2(ushort2);\n"
31938"double2 __ovld __cnfn convert_double2_rte(char2);\n"
31939"double2 __ovld __cnfn convert_double2_rte(double2);\n"
31940"double2 __ovld __cnfn convert_double2_rte(float2);\n"
31941"double2 __ovld __cnfn convert_double2_rte(int2);\n"
31942"double2 __ovld __cnfn convert_double2_rte(long2);\n"
31943"double2 __ovld __cnfn convert_double2_rte(short2);\n"
31944"double2 __ovld __cnfn convert_double2_rte(uchar2);\n"
31945"double2 __ovld __cnfn convert_double2_rte(uint2);\n"
31946"double2 __ovld __cnfn convert_double2_rte(ulong2);\n"
31947"double2 __ovld __cnfn convert_double2_rte(ushort2);\n"
31948"double2 __ovld __cnfn convert_double2_rtn(char2);\n"
31949"double2 __ovld __cnfn convert_double2_rtn(double2);\n"
31950"double2 __ovld __cnfn convert_double2_rtn(float2);\n"
31951"double2 __ovld __cnfn convert_double2_rtn(int2);\n"
31952"double2 __ovld __cnfn convert_double2_rtn(long2);\n"
31953"double2 __ovld __cnfn convert_double2_rtn(short2);\n"
31954"double2 __ovld __cnfn convert_double2_rtn(uchar2);\n"
31955"double2 __ovld __cnfn convert_double2_rtn(uint2);\n"
31956"double2 __ovld __cnfn convert_double2_rtn(ulong2);\n"
31957"double2 __ovld __cnfn convert_double2_rtn(ushort2);\n"
31958"double2 __ovld __cnfn convert_double2_rtp(char2);\n"
31959"double2 __ovld __cnfn convert_double2_rtp(double2);\n"
31960"double2 __ovld __cnfn convert_double2_rtp(float2);\n"
31961"double2 __ovld __cnfn convert_double2_rtp(int2);\n"
31962"double2 __ovld __cnfn convert_double2_rtp(long2);\n"
31963"double2 __ovld __cnfn convert_double2_rtp(short2);\n"
31964"double2 __ovld __cnfn convert_double2_rtp(uchar2);\n"
31965"double2 __ovld __cnfn convert_double2_rtp(uint2);\n"
31966"double2 __ovld __cnfn convert_double2_rtp(ulong2);\n"
31967"double2 __ovld __cnfn convert_double2_rtp(ushort2);\n"
31968"double2 __ovld __cnfn convert_double2_rtz(char2);\n"
31969"double2 __ovld __cnfn convert_double2_rtz(double2);\n"
31970"double2 __ovld __cnfn convert_double2_rtz(float2);\n"
31971"double2 __ovld __cnfn convert_double2_rtz(int2);\n"
31972"double2 __ovld __cnfn convert_double2_rtz(long2);\n"
31973"double2 __ovld __cnfn convert_double2_rtz(short2);\n"
31974"double2 __ovld __cnfn convert_double2_rtz(uchar2);\n"
31975"double2 __ovld __cnfn convert_double2_rtz(uint2);\n"
31976"double2 __ovld __cnfn convert_double2_rtz(ulong2);\n"
31977"double2 __ovld __cnfn convert_double2_rtz(ushort2);\n"
31978"double3 __ovld __cnfn convert_double3(char3);\n"
31979"double3 __ovld __cnfn convert_double3(double3);\n"
31980"double3 __ovld __cnfn convert_double3(float3);\n"
31981"double3 __ovld __cnfn convert_double3(int3);\n"
31982"double3 __ovld __cnfn convert_double3(long3);\n"
31983"double3 __ovld __cnfn convert_double3(short3);\n"
31984"double3 __ovld __cnfn convert_double3(uchar3);\n"
31985"double3 __ovld __cnfn convert_double3(uint3);\n"
31986"double3 __ovld __cnfn convert_double3(ulong3);\n"
31987"double3 __ovld __cnfn convert_double3(ushort3);\n"
31988"double3 __ovld __cnfn convert_double3_rte(char3);\n"
31989"double3 __ovld __cnfn convert_double3_rte(double3);\n"
31990"double3 __ovld __cnfn convert_double3_rte(float3);\n"
31991"double3 __ovld __cnfn convert_double3_rte(int3);\n"
31992"double3 __ovld __cnfn convert_double3_rte(long3);\n"
31993"double3 __ovld __cnfn convert_double3_rte(short3);\n"
31994"double3 __ovld __cnfn convert_double3_rte(uchar3);\n"
31995"double3 __ovld __cnfn convert_double3_rte(uint3);\n"
31996"double3 __ovld __cnfn convert_double3_rte(ulong3);\n"
31997"double3 __ovld __cnfn convert_double3_rte(ushort3);\n"
31998"double3 __ovld __cnfn convert_double3_rtn(char3);\n"
31999"double3 __ovld __cnfn convert_double3_rtn(double3);\n"
32000"double3 __ovld __cnfn convert_double3_rtn(float3);\n"
32001"double3 __ovld __cnfn convert_double3_rtn(int3);\n"
32002"double3 __ovld __cnfn convert_double3_rtn(long3);\n"
32003"double3 __ovld __cnfn convert_double3_rtn(short3);\n"
32004"double3 __ovld __cnfn convert_double3_rtn(uchar3);\n"
32005"double3 __ovld __cnfn convert_double3_rtn(uint3);\n"
32006"double3 __ovld __cnfn convert_double3_rtn(ulong3);\n"
32007"double3 __ovld __cnfn convert_double3_rtn(ushort3);\n"
32008"double3 __ovld __cnfn convert_double3_rtp(char3);\n"
32009"double3 __ovld __cnfn convert_double3_rtp(double3);\n"
32010"double3 __ovld __cnfn convert_double3_rtp(float3);\n"
32011"double3 __ovld __cnfn convert_double3_rtp(int3);\n"
32012"double3 __ovld __cnfn convert_double3_rtp(long3);\n"
32013"double3 __ovld __cnfn convert_double3_rtp(short3);\n"
32014"double3 __ovld __cnfn convert_double3_rtp(uchar3);\n"
32015"double3 __ovld __cnfn convert_double3_rtp(uint3);\n"
32016"double3 __ovld __cnfn convert_double3_rtp(ulong3);\n"
32017"double3 __ovld __cnfn convert_double3_rtp(ushort3);\n"
32018"double3 __ovld __cnfn convert_double3_rtz(char3);\n"
32019"double3 __ovld __cnfn convert_double3_rtz(double3);\n"
32020"double3 __ovld __cnfn convert_double3_rtz(float3);\n"
32021"double3 __ovld __cnfn convert_double3_rtz(int3);\n"
32022"double3 __ovld __cnfn convert_double3_rtz(long3);\n"
32023"double3 __ovld __cnfn convert_double3_rtz(short3);\n"
32024"double3 __ovld __cnfn convert_double3_rtz(uchar3);\n"
32025"double3 __ovld __cnfn convert_double3_rtz(uint3);\n"
32026"double3 __ovld __cnfn convert_double3_rtz(ulong3);\n"
32027"double3 __ovld __cnfn convert_double3_rtz(ushort3);\n"
32028"double4 __ovld __cnfn convert_double4(char4);\n"
32029"double4 __ovld __cnfn convert_double4(double4);\n"
32030"double4 __ovld __cnfn convert_double4(float4);\n"
32031"double4 __ovld __cnfn convert_double4(int4);\n"
32032"double4 __ovld __cnfn convert_double4(long4);\n"
32033"double4 __ovld __cnfn convert_double4(short4);\n"
32034"double4 __ovld __cnfn convert_double4(uchar4);\n"
32035"double4 __ovld __cnfn convert_double4(uint4);\n"
32036"double4 __ovld __cnfn convert_double4(ulong4);\n"
32037"double4 __ovld __cnfn convert_double4(ushort4);\n"
32038"double4 __ovld __cnfn convert_double4_rte(char4);\n"
32039"double4 __ovld __cnfn convert_double4_rte(double4);\n"
32040"double4 __ovld __cnfn convert_double4_rte(float4);\n"
32041"double4 __ovld __cnfn convert_double4_rte(int4);\n"
32042"double4 __ovld __cnfn convert_double4_rte(long4);\n"
32043"double4 __ovld __cnfn convert_double4_rte(short4);\n"
32044"double4 __ovld __cnfn convert_double4_rte(uchar4);\n"
32045"double4 __ovld __cnfn convert_double4_rte(uint4);\n"
32046"double4 __ovld __cnfn convert_double4_rte(ulong4);\n"
32047"double4 __ovld __cnfn convert_double4_rte(ushort4);\n"
32048"double4 __ovld __cnfn convert_double4_rtn(char4);\n"
32049"double4 __ovld __cnfn convert_double4_rtn(double4);\n"
32050"double4 __ovld __cnfn convert_double4_rtn(float4);\n"
32051"double4 __ovld __cnfn convert_double4_rtn(int4);\n"
32052"double4 __ovld __cnfn convert_double4_rtn(long4);\n"
32053"double4 __ovld __cnfn convert_double4_rtn(short4);\n"
32054"double4 __ovld __cnfn convert_double4_rtn(uchar4);\n"
32055"double4 __ovld __cnfn convert_double4_rtn(uint4);\n"
32056"double4 __ovld __cnfn convert_double4_rtn(ulong4);\n"
32057"double4 __ovld __cnfn convert_double4_rtn(ushort4);\n"
32058"double4 __ovld __cnfn convert_double4_rtp(char4);\n"
32059"double4 __ovld __cnfn convert_double4_rtp(double4);\n"
32060"double4 __ovld __cnfn convert_double4_rtp(float4);\n"
32061"double4 __ovld __cnfn convert_double4_rtp(int4);\n"
32062"double4 __ovld __cnfn convert_double4_rtp(long4);\n"
32063"double4 __ovld __cnfn convert_double4_rtp(short4);\n"
32064"double4 __ovld __cnfn convert_double4_rtp(uchar4);\n"
32065"double4 __ovld __cnfn convert_double4_rtp(uint4);\n"
32066"double4 __ovld __cnfn convert_double4_rtp(ulong4);\n"
32067"double4 __ovld __cnfn convert_double4_rtp(ushort4);\n"
32068"double4 __ovld __cnfn convert_double4_rtz(char4);\n"
32069"double4 __ovld __cnfn convert_double4_rtz(double4);\n"
32070"double4 __ovld __cnfn convert_double4_rtz(float4);\n"
32071"double4 __ovld __cnfn convert_double4_rtz(int4);\n"
32072"double4 __ovld __cnfn convert_double4_rtz(long4);\n"
32073"double4 __ovld __cnfn convert_double4_rtz(short4);\n"
32074"double4 __ovld __cnfn convert_double4_rtz(uchar4);\n"
32075"double4 __ovld __cnfn convert_double4_rtz(uint4);\n"
32076"double4 __ovld __cnfn convert_double4_rtz(ulong4);\n"
32077"double4 __ovld __cnfn convert_double4_rtz(ushort4);\n"
32078"double8 __ovld __cnfn convert_double8(char8);\n"
32079"double8 __ovld __cnfn convert_double8(double8);\n"
32080"double8 __ovld __cnfn convert_double8(float8);\n"
32081"double8 __ovld __cnfn convert_double8(int8);\n"
32082"double8 __ovld __cnfn convert_double8(long8);\n"
32083"double8 __ovld __cnfn convert_double8(short8);\n"
32084"double8 __ovld __cnfn convert_double8(uchar8);\n"
32085"double8 __ovld __cnfn convert_double8(uint8);\n"
32086"double8 __ovld __cnfn convert_double8(ulong8);\n"
32087"double8 __ovld __cnfn convert_double8(ushort8);\n"
32088"double8 __ovld __cnfn convert_double8_rte(char8);\n"
32089"double8 __ovld __cnfn convert_double8_rte(double8);\n"
32090"double8 __ovld __cnfn convert_double8_rte(float8);\n"
32091"double8 __ovld __cnfn convert_double8_rte(int8);\n"
32092"double8 __ovld __cnfn convert_double8_rte(long8);\n"
32093"double8 __ovld __cnfn convert_double8_rte(short8);\n"
32094"double8 __ovld __cnfn convert_double8_rte(uchar8);\n"
32095"double8 __ovld __cnfn convert_double8_rte(uint8);\n"
32096"double8 __ovld __cnfn convert_double8_rte(ulong8);\n"
32097"double8 __ovld __cnfn convert_double8_rte(ushort8);\n"
32098"double8 __ovld __cnfn convert_double8_rtn(char8);\n"
32099"double8 __ovld __cnfn convert_double8_rtn(double8);\n"
32100"double8 __ovld __cnfn convert_double8_rtn(float8);\n"
32101"double8 __ovld __cnfn convert_double8_rtn(int8);\n"
32102"double8 __ovld __cnfn convert_double8_rtn(long8);\n"
32103"double8 __ovld __cnfn convert_double8_rtn(short8);\n"
32104"double8 __ovld __cnfn convert_double8_rtn(uchar8);\n"
32105"double8 __ovld __cnfn convert_double8_rtn(uint8);\n"
32106"double8 __ovld __cnfn convert_double8_rtn(ulong8);\n"
32107"double8 __ovld __cnfn convert_double8_rtn(ushort8);\n"
32108"double8 __ovld __cnfn convert_double8_rtp(char8);\n"
32109"double8 __ovld __cnfn convert_double8_rtp(double8);\n"
32110"double8 __ovld __cnfn convert_double8_rtp(float8);\n"
32111"double8 __ovld __cnfn convert_double8_rtp(int8);\n"
32112"double8 __ovld __cnfn convert_double8_rtp(long8);\n"
32113"double8 __ovld __cnfn convert_double8_rtp(short8);\n"
32114"double8 __ovld __cnfn convert_double8_rtp(uchar8);\n"
32115"double8 __ovld __cnfn convert_double8_rtp(uint8);\n"
32116"double8 __ovld __cnfn convert_double8_rtp(ulong8);\n"
32117"double8 __ovld __cnfn convert_double8_rtp(ushort8);\n"
32118"double8 __ovld __cnfn convert_double8_rtz(char8);\n"
32119"double8 __ovld __cnfn convert_double8_rtz(double8);\n"
32120"double8 __ovld __cnfn convert_double8_rtz(float8);\n"
32121"double8 __ovld __cnfn convert_double8_rtz(int8);\n"
32122"double8 __ovld __cnfn convert_double8_rtz(long8);\n"
32123"double8 __ovld __cnfn convert_double8_rtz(short8);\n"
32124"double8 __ovld __cnfn convert_double8_rtz(uchar8);\n"
32125"double8 __ovld __cnfn convert_double8_rtz(uint8);\n"
32126"double8 __ovld __cnfn convert_double8_rtz(ulong8);\n"
32127"double8 __ovld __cnfn convert_double8_rtz(ushort8);\n"
32128"double16 __ovld __cnfn convert_double16(char16);\n"
32129"double16 __ovld __cnfn convert_double16(double16);\n"
32130"double16 __ovld __cnfn convert_double16(float16);\n"
32131"double16 __ovld __cnfn convert_double16(int16);\n"
32132"double16 __ovld __cnfn convert_double16(long16);\n"
32133"double16 __ovld __cnfn convert_double16(short16);\n"
32134"double16 __ovld __cnfn convert_double16(uchar16);\n"
32135"double16 __ovld __cnfn convert_double16(uint16);\n"
32136"double16 __ovld __cnfn convert_double16(ulong16);\n"
32137"double16 __ovld __cnfn convert_double16(ushort16);\n"
32138"double16 __ovld __cnfn convert_double16_rte(char16);\n"
32139"double16 __ovld __cnfn convert_double16_rte(double16);\n"
32140"double16 __ovld __cnfn convert_double16_rte(float16);\n"
32141"double16 __ovld __cnfn convert_double16_rte(int16);\n"
32142"double16 __ovld __cnfn convert_double16_rte(long16);\n"
32143"double16 __ovld __cnfn convert_double16_rte(short16);\n"
32144"double16 __ovld __cnfn convert_double16_rte(uchar16);\n"
32145"double16 __ovld __cnfn convert_double16_rte(uint16);\n"
32146"double16 __ovld __cnfn convert_double16_rte(ulong16);\n"
32147"double16 __ovld __cnfn convert_double16_rte(ushort16);\n"
32148"double16 __ovld __cnfn convert_double16_rtn(char16);\n"
32149"double16 __ovld __cnfn convert_double16_rtn(double16);\n"
32150"double16 __ovld __cnfn convert_double16_rtn(float16);\n"
32151"double16 __ovld __cnfn convert_double16_rtn(int16);\n"
32152"double16 __ovld __cnfn convert_double16_rtn(long16);\n"
32153"double16 __ovld __cnfn convert_double16_rtn(short16);\n"
32154"double16 __ovld __cnfn convert_double16_rtn(uchar16);\n"
32155"double16 __ovld __cnfn convert_double16_rtn(uint16);\n"
32156"double16 __ovld __cnfn convert_double16_rtn(ulong16);\n"
32157"double16 __ovld __cnfn convert_double16_rtn(ushort16);\n"
32158"double16 __ovld __cnfn convert_double16_rtp(char16);\n"
32159"double16 __ovld __cnfn convert_double16_rtp(double16);\n"
32160"double16 __ovld __cnfn convert_double16_rtp(float16);\n"
32161"double16 __ovld __cnfn convert_double16_rtp(int16);\n"
32162"double16 __ovld __cnfn convert_double16_rtp(long16);\n"
32163"double16 __ovld __cnfn convert_double16_rtp(short16);\n"
32164"double16 __ovld __cnfn convert_double16_rtp(uchar16);\n"
32165"double16 __ovld __cnfn convert_double16_rtp(uint16);\n"
32166"double16 __ovld __cnfn convert_double16_rtp(ulong16);\n"
32167"double16 __ovld __cnfn convert_double16_rtp(ushort16);\n"
32168"double16 __ovld __cnfn convert_double16_rtz(char16);\n"
32169"double16 __ovld __cnfn convert_double16_rtz(double16);\n"
32170"double16 __ovld __cnfn convert_double16_rtz(float16);\n"
32171"double16 __ovld __cnfn convert_double16_rtz(int16);\n"
32172"double16 __ovld __cnfn convert_double16_rtz(long16);\n"
32173"double16 __ovld __cnfn convert_double16_rtz(short16);\n"
32174"double16 __ovld __cnfn convert_double16_rtz(uchar16);\n"
32175"double16 __ovld __cnfn convert_double16_rtz(uint16);\n"
32176"double16 __ovld __cnfn convert_double16_rtz(ulong16);\n"
32177"double16 __ovld __cnfn convert_double16_rtz(ushort16);\n"
32178"#endif //cl_khr_fp64\n"
32179"\n"
32180"#ifdef cl_khr_fp16\n"
32181"// Convert half types to non-double types.\n"
32182"uchar __ovld __cnfn convert_uchar(half);\n"
32183"uchar __ovld __cnfn convert_uchar_rte(half);\n"
32184"uchar __ovld __cnfn convert_uchar_rtp(half);\n"
32185"uchar __ovld __cnfn convert_uchar_rtn(half);\n"
32186"uchar __ovld __cnfn convert_uchar_rtz(half);\n"
32187"uchar __ovld __cnfn convert_uchar_sat(half);\n"
32188"uchar __ovld __cnfn convert_uchar_sat_rte(half);\n"
32189"uchar __ovld __cnfn convert_uchar_sat_rtp(half);\n"
32190"uchar __ovld __cnfn convert_uchar_sat_rtn(half);\n"
32191"uchar __ovld __cnfn convert_uchar_sat_rtz(half);\n"
32192"uchar2 __ovld __cnfn convert_uchar2(half2);\n"
32193"uchar2 __ovld __cnfn convert_uchar2_rte(half2);\n"
32194"uchar2 __ovld __cnfn convert_uchar2_rtp(half2);\n"
32195"uchar2 __ovld __cnfn convert_uchar2_rtn(half2);\n"
32196"uchar2 __ovld __cnfn convert_uchar2_rtz(half2);\n"
32197"uchar2 __ovld __cnfn convert_uchar2_sat(half2);\n"
32198"uchar2 __ovld __cnfn convert_uchar2_sat_rte(half2);\n"
32199"uchar2 __ovld __cnfn convert_uchar2_sat_rtp(half2);\n"
32200"uchar2 __ovld __cnfn convert_uchar2_sat_rtn(half2);\n"
32201"uchar2 __ovld __cnfn convert_uchar2_sat_rtz(half2);\n"
32202"uchar3 __ovld __cnfn convert_uchar3(half3);\n"
32203"uchar3 __ovld __cnfn convert_uchar3_rte(half3);\n"
32204"uchar3 __ovld __cnfn convert_uchar3_rtp(half3);\n"
32205"uchar3 __ovld __cnfn convert_uchar3_rtn(half3);\n"
32206"uchar3 __ovld __cnfn convert_uchar3_rtz(half3);\n"
32207"uchar3 __ovld __cnfn convert_uchar3_sat(half3);\n"
32208"uchar3 __ovld __cnfn convert_uchar3_sat_rte(half3);\n"
32209"uchar3 __ovld __cnfn convert_uchar3_sat_rtp(half3);\n"
32210"uchar3 __ovld __cnfn convert_uchar3_sat_rtn(half3);\n"
32211"uchar3 __ovld __cnfn convert_uchar3_sat_rtz(half3);\n"
32212"uchar4 __ovld __cnfn convert_uchar4(half4);\n"
32213"uchar4 __ovld __cnfn convert_uchar4_rte(half4);\n"
32214"uchar4 __ovld __cnfn convert_uchar4_rtp(half4);\n"
32215"uchar4 __ovld __cnfn convert_uchar4_rtn(half4);\n"
32216"uchar4 __ovld __cnfn convert_uchar4_rtz(half4);\n"
32217"uchar4 __ovld __cnfn convert_uchar4_sat(half4);\n"
32218"uchar4 __ovld __cnfn convert_uchar4_sat_rte(half4);\n"
32219"uchar4 __ovld __cnfn convert_uchar4_sat_rtp(half4);\n"
32220"uchar4 __ovld __cnfn convert_uchar4_sat_rtn(half4);\n"
32221"uchar4 __ovld __cnfn convert_uchar4_sat_rtz(half4);\n"
32222"uchar8 __ovld __cnfn convert_uchar8(half8);\n"
32223"uchar8 __ovld __cnfn convert_uchar8_rte(half8);\n"
32224"uchar8 __ovld __cnfn convert_uchar8_rtp(half8);\n"
32225"uchar8 __ovld __cnfn convert_uchar8_rtn(half8);\n"
32226"uchar8 __ovld __cnfn convert_uchar8_rtz(half8);\n"
32227"uchar8 __ovld __cnfn convert_uchar8_sat(half8);\n"
32228"uchar8 __ovld __cnfn convert_uchar8_sat_rte(half8);\n"
32229"uchar8 __ovld __cnfn convert_uchar8_sat_rtp(half8);\n"
32230"uchar8 __ovld __cnfn convert_uchar8_sat_rtn(half8);\n"
32231"uchar8 __ovld __cnfn convert_uchar8_sat_rtz(half8);\n"
32232"uchar16 __ovld __cnfn convert_uchar16(half16);\n"
32233"uchar16 __ovld __cnfn convert_uchar16_rte(half16);\n"
32234"uchar16 __ovld __cnfn convert_uchar16_rtp(half16);\n"
32235"uchar16 __ovld __cnfn convert_uchar16_rtn(half16);\n"
32236"uchar16 __ovld __cnfn convert_uchar16_rtz(half16);\n"
32237"uchar16 __ovld __cnfn convert_uchar16_sat(half16);\n"
32238"uchar16 __ovld __cnfn convert_uchar16_sat_rte(half16);\n"
32239"uchar16 __ovld __cnfn convert_uchar16_sat_rtp(half16);\n"
32240"uchar16 __ovld __cnfn convert_uchar16_sat_rtn(half16);\n"
32241"uchar16 __ovld __cnfn convert_uchar16_sat_rtz(half16);\n"
32242"ushort __ovld __cnfn convert_ushort(half);\n"
32243"ushort __ovld __cnfn convert_ushort_rte(half);\n"
32244"ushort __ovld __cnfn convert_ushort_rtp(half);\n"
32245"ushort __ovld __cnfn convert_ushort_rtn(half);\n"
32246"ushort __ovld __cnfn convert_ushort_rtz(half);\n"
32247"ushort __ovld __cnfn convert_ushort_sat(half);\n"
32248"ushort __ovld __cnfn convert_ushort_sat_rte(half);\n"
32249"ushort __ovld __cnfn convert_ushort_sat_rtp(half);\n"
32250"ushort __ovld __cnfn convert_ushort_sat_rtn(half);\n"
32251"ushort __ovld __cnfn convert_ushort_sat_rtz(half);\n"
32252"ushort2 __ovld __cnfn convert_ushort2(half2);\n"
32253"ushort2 __ovld __cnfn convert_ushort2_rte(half2);\n"
32254"ushort2 __ovld __cnfn convert_ushort2_rtp(half2);\n"
32255"ushort2 __ovld __cnfn convert_ushort2_rtn(half2);\n"
32256"ushort2 __ovld __cnfn convert_ushort2_rtz(half2);\n"
32257"ushort2 __ovld __cnfn convert_ushort2_sat(half2);\n"
32258"ushort2 __ovld __cnfn convert_ushort2_sat_rte(half2);\n"
32259"ushort2 __ovld __cnfn convert_ushort2_sat_rtp(half2);\n"
32260"ushort2 __ovld __cnfn convert_ushort2_sat_rtn(half2);\n"
32261"ushort2 __ovld __cnfn convert_ushort2_sat_rtz(half2);\n"
32262"ushort3 __ovld __cnfn convert_ushort3(half3);\n"
32263"ushort3 __ovld __cnfn convert_ushort3_rte(half3);\n"
32264"ushort3 __ovld __cnfn convert_ushort3_rtp(half3);\n"
32265"ushort3 __ovld __cnfn convert_ushort3_rtn(half3);\n"
32266"ushort3 __ovld __cnfn convert_ushort3_rtz(half3);\n"
32267"ushort3 __ovld __cnfn convert_ushort3_sat(half3);\n"
32268"ushort3 __ovld __cnfn convert_ushort3_sat_rte(half3);\n"
32269"ushort3 __ovld __cnfn convert_ushort3_sat_rtp(half3);\n"
32270"ushort3 __ovld __cnfn convert_ushort3_sat_rtn(half3);\n"
32271"ushort3 __ovld __cnfn convert_ushort3_sat_rtz(half3);\n"
32272"ushort4 __ovld __cnfn convert_ushort4(half4);\n"
32273"ushort4 __ovld __cnfn convert_ushort4_rte(half4);\n"
32274"ushort4 __ovld __cnfn convert_ushort4_rtp(half4);\n"
32275"ushort4 __ovld __cnfn convert_ushort4_rtn(half4);\n"
32276"ushort4 __ovld __cnfn convert_ushort4_rtz(half4);\n"
32277"ushort4 __ovld __cnfn convert_ushort4_sat(half4);\n"
32278"ushort4 __ovld __cnfn convert_ushort4_sat_rte(half4);\n"
32279"ushort4 __ovld __cnfn convert_ushort4_sat_rtp(half4);\n"
32280"ushort4 __ovld __cnfn convert_ushort4_sat_rtn(half4);\n"
32281"ushort4 __ovld __cnfn convert_ushort4_sat_rtz(half4);\n"
32282"ushort8 __ovld __cnfn convert_ushort8(half8);\n"
32283"ushort8 __ovld __cnfn convert_ushort8_rte(half8);\n"
32284"ushort8 __ovld __cnfn convert_ushort8_rtp(half8);\n"
32285"ushort8 __ovld __cnfn convert_ushort8_rtn(half8);\n"
32286"ushort8 __ovld __cnfn convert_ushort8_rtz(half8);\n"
32287"ushort8 __ovld __cnfn convert_ushort8_sat(half8);\n"
32288"ushort8 __ovld __cnfn convert_ushort8_sat_rte(half8);\n"
32289"ushort8 __ovld __cnfn convert_ushort8_sat_rtp(half8);\n"
32290"ushort8 __ovld __cnfn convert_ushort8_sat_rtn(half8);\n"
32291"ushort8 __ovld __cnfn convert_ushort8_sat_rtz(half8);\n"
32292"ushort16 __ovld __cnfn convert_ushort16(half16);\n"
32293"ushort16 __ovld __cnfn convert_ushort16_rte(half16);\n"
32294"ushort16 __ovld __cnfn convert_ushort16_rtp(half16);\n"
32295"ushort16 __ovld __cnfn convert_ushort16_rtn(half16);\n"
32296"ushort16 __ovld __cnfn convert_ushort16_rtz(half16);\n"
32297"ushort16 __ovld __cnfn convert_ushort16_sat(half16);\n"
32298"ushort16 __ovld __cnfn convert_ushort16_sat_rte(half16);\n"
32299"ushort16 __ovld __cnfn convert_ushort16_sat_rtp(half16);\n"
32300"ushort16 __ovld __cnfn convert_ushort16_sat_rtn(half16);\n"
32301"ushort16 __ovld __cnfn convert_ushort16_sat_rtz(half16);\n"
32302"uint __ovld __cnfn convert_uint(half);\n"
32303"uint __ovld __cnfn convert_uint_rte(half);\n"
32304"uint __ovld __cnfn convert_uint_rtp(half);\n"
32305"uint __ovld __cnfn convert_uint_rtn(half);\n"
32306"uint __ovld __cnfn convert_uint_rtz(half);\n"
32307"uint __ovld __cnfn convert_uint_sat(half);\n"
32308"uint __ovld __cnfn convert_uint_sat_rte(half);\n"
32309"uint __ovld __cnfn convert_uint_sat_rtp(half);\n"
32310"uint __ovld __cnfn convert_uint_sat_rtn(half);\n"
32311"uint __ovld __cnfn convert_uint_sat_rtz(half);\n"
32312"uint2 __ovld __cnfn convert_uint2(half2);\n"
32313"uint2 __ovld __cnfn convert_uint2_rte(half2);\n"
32314"uint2 __ovld __cnfn convert_uint2_rtp(half2);\n"
32315"uint2 __ovld __cnfn convert_uint2_rtn(half2);\n"
32316"uint2 __ovld __cnfn convert_uint2_rtz(half2);\n"
32317"uint2 __ovld __cnfn convert_uint2_sat(half2);\n"
32318"uint2 __ovld __cnfn convert_uint2_sat_rte(half2);\n"
32319"uint2 __ovld __cnfn convert_uint2_sat_rtp(half2);\n"
32320"uint2 __ovld __cnfn convert_uint2_sat_rtn(half2);\n"
32321"uint2 __ovld __cnfn convert_uint2_sat_rtz(half2);\n"
32322"uint3 __ovld __cnfn convert_uint3(half3);\n"
32323"uint3 __ovld __cnfn convert_uint3_rte(half3);\n"
32324"uint3 __ovld __cnfn convert_uint3_rtp(half3);\n"
32325"uint3 __ovld __cnfn convert_uint3_rtn(half3);\n"
32326"uint3 __ovld __cnfn convert_uint3_rtz(half3);\n"
32327"uint3 __ovld __cnfn convert_uint3_sat(half3);\n"
32328"uint3 __ovld __cnfn convert_uint3_sat_rte(half3);\n"
32329"uint3 __ovld __cnfn convert_uint3_sat_rtp(half3);\n"
32330"uint3 __ovld __cnfn convert_uint3_sat_rtn(half3);\n"
32331"uint3 __ovld __cnfn convert_uint3_sat_rtz(half3);\n"
32332"uint4 __ovld __cnfn convert_uint4(half4);\n"
32333"uint4 __ovld __cnfn convert_uint4_rte(half4);\n"
32334"uint4 __ovld __cnfn convert_uint4_rtp(half4);\n"
32335"uint4 __ovld __cnfn convert_uint4_rtn(half4);\n"
32336"uint4 __ovld __cnfn convert_uint4_rtz(half4);\n"
32337"uint4 __ovld __cnfn convert_uint4_sat(half4);\n"
32338"uint4 __ovld __cnfn convert_uint4_sat_rte(half4);\n"
32339"uint4 __ovld __cnfn convert_uint4_sat_rtp(half4);\n"
32340"uint4 __ovld __cnfn convert_uint4_sat_rtn(half4);\n"
32341"uint4 __ovld __cnfn convert_uint4_sat_rtz(half4);\n"
32342"uint8 __ovld __cnfn convert_uint8(half8);\n"
32343"uint8 __ovld __cnfn convert_uint8_rte(half8);\n"
32344"uint8 __ovld __cnfn convert_uint8_rtp(half8);\n"
32345"uint8 __ovld __cnfn convert_uint8_rtn(half8);\n"
32346"uint8 __ovld __cnfn convert_uint8_rtz(half8);\n"
32347"uint8 __ovld __cnfn convert_uint8_sat(half8);\n"
32348"uint8 __ovld __cnfn convert_uint8_sat_rte(half8);\n"
32349"uint8 __ovld __cnfn convert_uint8_sat_rtp(half8);\n"
32350"uint8 __ovld __cnfn convert_uint8_sat_rtn(half8);\n"
32351"uint8 __ovld __cnfn convert_uint8_sat_rtz(half8);\n"
32352"uint16 __ovld __cnfn convert_uint16(half16);\n"
32353"uint16 __ovld __cnfn convert_uint16_rte(half16);\n"
32354"uint16 __ovld __cnfn convert_uint16_rtp(half16);\n"
32355"uint16 __ovld __cnfn convert_uint16_rtn(half16);\n"
32356"uint16 __ovld __cnfn convert_uint16_rtz(half16);\n"
32357"uint16 __ovld __cnfn convert_uint16_sat(half16);\n"
32358"uint16 __ovld __cnfn convert_uint16_sat_rte(half16);\n"
32359"uint16 __ovld __cnfn convert_uint16_sat_rtp(half16);\n"
32360"uint16 __ovld __cnfn convert_uint16_sat_rtn(half16);\n"
32361"uint16 __ovld __cnfn convert_uint16_sat_rtz(half16);\n"
32362"ulong __ovld __cnfn convert_ulong(half);\n"
32363"ulong __ovld __cnfn convert_ulong_rte(half);\n"
32364"ulong __ovld __cnfn convert_ulong_rtp(half);\n"
32365"ulong __ovld __cnfn convert_ulong_rtn(half);\n"
32366"ulong __ovld __cnfn convert_ulong_rtz(half);\n"
32367"ulong __ovld __cnfn convert_ulong_sat(half);\n"
32368"ulong __ovld __cnfn convert_ulong_sat_rte(half);\n"
32369"ulong __ovld __cnfn convert_ulong_sat_rtp(half);\n"
32370"ulong __ovld __cnfn convert_ulong_sat_rtn(half);\n"
32371"ulong __ovld __cnfn convert_ulong_sat_rtz(half);\n"
32372"ulong2 __ovld __cnfn convert_ulong2(half2);\n"
32373"ulong2 __ovld __cnfn convert_ulong2_rte(half2);\n"
32374"ulong2 __ovld __cnfn convert_ulong2_rtp(half2);\n"
32375"ulong2 __ovld __cnfn convert_ulong2_rtn(half2);\n"
32376"ulong2 __ovld __cnfn convert_ulong2_rtz(half2);\n"
32377"ulong2 __ovld __cnfn convert_ulong2_sat(half2);\n"
32378"ulong2 __ovld __cnfn convert_ulong2_sat_rte(half2);\n"
32379"ulong2 __ovld __cnfn convert_ulong2_sat_rtp(half2);\n"
32380"ulong2 __ovld __cnfn convert_ulong2_sat_rtn(half2);\n"
32381"ulong2 __ovld __cnfn convert_ulong2_sat_rtz(half2);\n"
32382"ulong3 __ovld __cnfn convert_ulong3(half3);\n"
32383"ulong3 __ovld __cnfn convert_ulong3_rte(half3);\n"
32384"ulong3 __ovld __cnfn convert_ulong3_rtp(half3);\n"
32385"ulong3 __ovld __cnfn convert_ulong3_rtn(half3);\n"
32386"ulong3 __ovld __cnfn convert_ulong3_rtz(half3);\n"
32387"ulong3 __ovld __cnfn convert_ulong3_sat(half3);\n"
32388"ulong3 __ovld __cnfn convert_ulong3_sat_rte(half3);\n"
32389"ulong3 __ovld __cnfn convert_ulong3_sat_rtp(half3);\n"
32390"ulong3 __ovld __cnfn convert_ulong3_sat_rtn(half3);\n"
32391"ulong3 __ovld __cnfn convert_ulong3_sat_rtz(half3);\n"
32392"ulong4 __ovld __cnfn convert_ulong4(half4);\n"
32393"ulong4 __ovld __cnfn convert_ulong4_rte(half4);\n"
32394"ulong4 __ovld __cnfn convert_ulong4_rtp(half4);\n"
32395"ulong4 __ovld __cnfn convert_ulong4_rtn(half4);\n"
32396"ulong4 __ovld __cnfn convert_ulong4_rtz(half4);\n"
32397"ulong4 __ovld __cnfn convert_ulong4_sat(half4);\n"
32398"ulong4 __ovld __cnfn convert_ulong4_sat_rte(half4);\n"
32399"ulong4 __ovld __cnfn convert_ulong4_sat_rtp(half4);\n"
32400"ulong4 __ovld __cnfn convert_ulong4_sat_rtn(half4);\n"
32401"ulong4 __ovld __cnfn convert_ulong4_sat_rtz(half4);\n"
32402"ulong8 __ovld __cnfn convert_ulong8(half8);\n"
32403"ulong8 __ovld __cnfn convert_ulong8_rte(half8);\n"
32404"ulong8 __ovld __cnfn convert_ulong8_rtp(half8);\n"
32405"ulong8 __ovld __cnfn convert_ulong8_rtn(half8);\n"
32406"ulong8 __ovld __cnfn convert_ulong8_rtz(half8);\n"
32407"ulong8 __ovld __cnfn convert_ulong8_sat(half8);\n"
32408"ulong8 __ovld __cnfn convert_ulong8_sat_rte(half8);\n"
32409"ulong8 __ovld __cnfn convert_ulong8_sat_rtp(half8);\n"
32410"ulong8 __ovld __cnfn convert_ulong8_sat_rtn(half8);\n"
32411"ulong8 __ovld __cnfn convert_ulong8_sat_rtz(half8);\n"
32412"ulong16 __ovld __cnfn convert_ulong16(half16);\n"
32413"ulong16 __ovld __cnfn convert_ulong16_rte(half16);\n"
32414"ulong16 __ovld __cnfn convert_ulong16_rtp(half16);\n"
32415"ulong16 __ovld __cnfn convert_ulong16_rtn(half16);\n"
32416"ulong16 __ovld __cnfn convert_ulong16_rtz(half16);\n"
32417"ulong16 __ovld __cnfn convert_ulong16_sat(half16);\n"
32418"ulong16 __ovld __cnfn convert_ulong16_sat_rte(half16);\n"
32419"ulong16 __ovld __cnfn convert_ulong16_sat_rtp(half16);\n"
32420"ulong16 __ovld __cnfn convert_ulong16_sat_rtn(half16);\n"
32421"ulong16 __ovld __cnfn convert_ulong16_sat_rtz(half16);\n"
32422"char __ovld __cnfn convert_char(half);\n"
32423"char __ovld __cnfn convert_char_rte(half);\n"
32424"char __ovld __cnfn convert_char_rtp(half);\n"
32425"char __ovld __cnfn convert_char_rtn(half);\n"
32426"char __ovld __cnfn convert_char_rtz(half);\n"
32427"char __ovld __cnfn convert_char_sat(half);\n"
32428"char __ovld __cnfn convert_char_sat_rte(half);\n"
32429"char __ovld __cnfn convert_char_sat_rtp(half);\n"
32430"char __ovld __cnfn convert_char_sat_rtn(half);\n"
32431"char __ovld __cnfn convert_char_sat_rtz(half);\n"
32432"char2 __ovld __cnfn convert_char2(half2);\n"
32433"char2 __ovld __cnfn convert_char2_rte(half2);\n"
32434"char2 __ovld __cnfn convert_char2_rtp(half2);\n"
32435"char2 __ovld __cnfn convert_char2_rtn(half2);\n"
32436"char2 __ovld __cnfn convert_char2_rtz(half2);\n"
32437"char2 __ovld __cnfn convert_char2_sat(half2);\n"
32438"char2 __ovld __cnfn convert_char2_sat_rte(half2);\n"
32439"char2 __ovld __cnfn convert_char2_sat_rtp(half2);\n"
32440"char2 __ovld __cnfn convert_char2_sat_rtn(half2);\n"
32441"char2 __ovld __cnfn convert_char2_sat_rtz(half2);\n"
32442"char3 __ovld __cnfn convert_char3(half3);\n"
32443"char3 __ovld __cnfn convert_char3_rte(half3);\n"
32444"char3 __ovld __cnfn convert_char3_rtp(half3);\n"
32445"char3 __ovld __cnfn convert_char3_rtn(half3);\n"
32446"char3 __ovld __cnfn convert_char3_rtz(half3);\n"
32447"char3 __ovld __cnfn convert_char3_sat(half3);\n"
32448"char3 __ovld __cnfn convert_char3_sat_rte(half3);\n"
32449"char3 __ovld __cnfn convert_char3_sat_rtp(half3);\n"
32450"char3 __ovld __cnfn convert_char3_sat_rtn(half3);\n"
32451"char3 __ovld __cnfn convert_char3_sat_rtz(half3);\n"
32452"char4 __ovld __cnfn convert_char4(half4);\n"
32453"char4 __ovld __cnfn convert_char4_rte(half4);\n"
32454"char4 __ovld __cnfn convert_char4_rtp(half4);\n"
32455"char4 __ovld __cnfn convert_char4_rtn(half4);\n"
32456"char4 __ovld __cnfn convert_char4_rtz(half4);\n"
32457"char4 __ovld __cnfn convert_char4_sat(half4);\n"
32458"char4 __ovld __cnfn convert_char4_sat_rte(half4);\n"
32459"char4 __ovld __cnfn convert_char4_sat_rtp(half4);\n"
32460"char4 __ovld __cnfn convert_char4_sat_rtn(half4);\n"
32461"char4 __ovld __cnfn convert_char4_sat_rtz(half4);\n"
32462"char8 __ovld __cnfn convert_char8(half8);\n"
32463"char8 __ovld __cnfn convert_char8_rte(half8);\n"
32464"char8 __ovld __cnfn convert_char8_rtp(half8);\n"
32465"char8 __ovld __cnfn convert_char8_rtn(half8);\n"
32466"char8 __ovld __cnfn convert_char8_rtz(half8);\n"
32467"char8 __ovld __cnfn convert_char8_sat(half8);\n"
32468"char8 __ovld __cnfn convert_char8_sat_rte(half8);\n"
32469"char8 __ovld __cnfn convert_char8_sat_rtp(half8);\n"
32470"char8 __ovld __cnfn convert_char8_sat_rtn(half8);\n"
32471"char8 __ovld __cnfn convert_char8_sat_rtz(half8);\n"
32472"char16 __ovld __cnfn convert_char16(half16);\n"
32473"char16 __ovld __cnfn convert_char16_rte(half16);\n"
32474"char16 __ovld __cnfn convert_char16_rtp(half16);\n"
32475"char16 __ovld __cnfn convert_char16_rtn(half16);\n"
32476"char16 __ovld __cnfn convert_char16_rtz(half16);\n"
32477"char16 __ovld __cnfn convert_char16_sat(half16);\n"
32478"char16 __ovld __cnfn convert_char16_sat_rte(half16);\n"
32479"char16 __ovld __cnfn convert_char16_sat_rtp(half16);\n"
32480"char16 __ovld __cnfn convert_char16_sat_rtn(half16);\n"
32481"char16 __ovld __cnfn convert_char16_sat_rtz(half16);\n"
32482"short __ovld __cnfn convert_short(half);\n"
32483"short __ovld __cnfn convert_short_rte(half);\n"
32484"short __ovld __cnfn convert_short_rtp(half);\n"
32485"short __ovld __cnfn convert_short_rtn(half);\n"
32486"short __ovld __cnfn convert_short_rtz(half);\n"
32487"short __ovld __cnfn convert_short_sat(half);\n"
32488"short __ovld __cnfn convert_short_sat_rte(half);\n"
32489"short __ovld __cnfn convert_short_sat_rtp(half);\n"
32490"short __ovld __cnfn convert_short_sat_rtn(half);\n"
32491"short __ovld __cnfn convert_short_sat_rtz(half);\n"
32492"short2 __ovld __cnfn convert_short2(half2);\n"
32493"short2 __ovld __cnfn convert_short2_rte(half2);\n"
32494"short2 __ovld __cnfn convert_short2_rtp(half2);\n"
32495"short2 __ovld __cnfn convert_short2_rtn(half2);\n"
32496"short2 __ovld __cnfn convert_short2_rtz(half2);\n"
32497"short2 __ovld __cnfn convert_short2_sat(half2);\n"
32498"short2 __ovld __cnfn convert_short2_sat_rte(half2);\n"
32499"short2 __ovld __cnfn convert_short2_sat_rtp(half2);\n"
32500"short2 __ovld __cnfn convert_short2_sat_rtn(half2);\n"
32501"short2 __ovld __cnfn convert_short2_sat_rtz(half2);\n"
32502"short3 __ovld __cnfn convert_short3(half3);\n"
32503"short3 __ovld __cnfn convert_short3_rte(half3);\n"
32504"short3 __ovld __cnfn convert_short3_rtp(half3);\n"
32505"short3 __ovld __cnfn convert_short3_rtn(half3);\n"
32506"short3 __ovld __cnfn convert_short3_rtz(half3);\n"
32507"short3 __ovld __cnfn convert_short3_sat(half3);\n"
32508"short3 __ovld __cnfn convert_short3_sat_rte(half3);\n"
32509"short3 __ovld __cnfn convert_short3_sat_rtp(half3);\n"
32510"short3 __ovld __cnfn convert_short3_sat_rtn(half3);\n"
32511"short3 __ovld __cnfn convert_short3_sat_rtz(half3);\n"
32512"short4 __ovld __cnfn convert_short4(half4);\n"
32513"short4 __ovld __cnfn convert_short4_rte(half4);\n"
32514"short4 __ovld __cnfn convert_short4_rtp(half4);\n"
32515"short4 __ovld __cnfn convert_short4_rtn(half4);\n"
32516"short4 __ovld __cnfn convert_short4_rtz(half4);\n"
32517"short4 __ovld __cnfn convert_short4_sat(half4);\n"
32518"short4 __ovld __cnfn convert_short4_sat_rte(half4);\n"
32519"short4 __ovld __cnfn convert_short4_sat_rtp(half4);\n"
32520"short4 __ovld __cnfn convert_short4_sat_rtn(half4);\n"
32521"short4 __ovld __cnfn convert_short4_sat_rtz(half4);\n"
32522"short8 __ovld __cnfn convert_short8(half8);\n"
32523"short8 __ovld __cnfn convert_short8_rte(half8);\n"
32524"short8 __ovld __cnfn convert_short8_rtp(half8);\n"
32525"short8 __ovld __cnfn convert_short8_rtn(half8);\n"
32526"short8 __ovld __cnfn convert_short8_rtz(half8);\n"
32527"short8 __ovld __cnfn convert_short8_sat(half8);\n"
32528"short8 __ovld __cnfn convert_short8_sat_rte(half8);\n"
32529"short8 __ovld __cnfn convert_short8_sat_rtp(half8);\n"
32530"short8 __ovld __cnfn convert_short8_sat_rtn(half8);\n"
32531"short8 __ovld __cnfn convert_short8_sat_rtz(half8);\n"
32532"short16 __ovld __cnfn convert_short16(half16);\n"
32533"short16 __ovld __cnfn convert_short16_rte(half16);\n"
32534"short16 __ovld __cnfn convert_short16_rtp(half16);\n"
32535"short16 __ovld __cnfn convert_short16_rtn(half16);\n"
32536"short16 __ovld __cnfn convert_short16_rtz(half16);\n"
32537"short16 __ovld __cnfn convert_short16_sat(half16);\n"
32538"short16 __ovld __cnfn convert_short16_sat_rte(half16);\n"
32539"short16 __ovld __cnfn convert_short16_sat_rtp(half16);\n"
32540"short16 __ovld __cnfn convert_short16_sat_rtn(half16);\n"
32541"short16 __ovld __cnfn convert_short16_sat_rtz(half16);\n"
32542"int __ovld __cnfn convert_int(half);\n"
32543"int __ovld __cnfn convert_int_rte(half);\n"
32544"int __ovld __cnfn convert_int_rtp(half);\n"
32545"int __ovld __cnfn convert_int_rtn(half);\n"
32546"int __ovld __cnfn convert_int_rtz(half);\n"
32547"int __ovld __cnfn convert_int_sat(half);\n"
32548"int __ovld __cnfn convert_int_sat_rte(half);\n"
32549"int __ovld __cnfn convert_int_sat_rtp(half);\n"
32550"int __ovld __cnfn convert_int_sat_rtn(half);\n"
32551"int __ovld __cnfn convert_int_sat_rtz(half);\n"
32552"int2 __ovld __cnfn convert_int2(half2);\n"
32553"int2 __ovld __cnfn convert_int2_rte(half2);\n"
32554"int2 __ovld __cnfn convert_int2_rtp(half2);\n"
32555"int2 __ovld __cnfn convert_int2_rtn(half2);\n"
32556"int2 __ovld __cnfn convert_int2_rtz(half2);\n"
32557"int2 __ovld __cnfn convert_int2_sat(half2);\n"
32558"int2 __ovld __cnfn convert_int2_sat_rte(half2);\n"
32559"int2 __ovld __cnfn convert_int2_sat_rtp(half2);\n"
32560"int2 __ovld __cnfn convert_int2_sat_rtn(half2);\n"
32561"int2 __ovld __cnfn convert_int2_sat_rtz(half2);\n"
32562"int3 __ovld __cnfn convert_int3(half3);\n"
32563"int3 __ovld __cnfn convert_int3_rte(half3);\n"
32564"int3 __ovld __cnfn convert_int3_rtp(half3);\n"
32565"int3 __ovld __cnfn convert_int3_rtn(half3);\n"
32566"int3 __ovld __cnfn convert_int3_rtz(half3);\n"
32567"int3 __ovld __cnfn convert_int3_sat(half3);\n"
32568"int3 __ovld __cnfn convert_int3_sat_rte(half3);\n"
32569"int3 __ovld __cnfn convert_int3_sat_rtp(half3);\n"
32570"int3 __ovld __cnfn convert_int3_sat_rtn(half3);\n"
32571"int3 __ovld __cnfn convert_int3_sat_rtz(half3);\n"
32572"int4 __ovld __cnfn convert_int4(half4);\n"
32573"int4 __ovld __cnfn convert_int4_rte(half4);\n"
32574"int4 __ovld __cnfn convert_int4_rtp(half4);\n"
32575"int4 __ovld __cnfn convert_int4_rtn(half4);\n"
32576"int4 __ovld __cnfn convert_int4_rtz(half4);\n"
32577"int4 __ovld __cnfn convert_int4_sat(half4);\n"
32578"int4 __ovld __cnfn convert_int4_sat_rte(half4);\n"
32579"int4 __ovld __cnfn convert_int4_sat_rtp(half4);\n"
32580"int4 __ovld __cnfn convert_int4_sat_rtn(half4);\n"
32581"int4 __ovld __cnfn convert_int4_sat_rtz(half4);\n"
32582"int8 __ovld __cnfn convert_int8(half8);\n"
32583"int8 __ovld __cnfn convert_int8_rte(half8);\n"
32584"int8 __ovld __cnfn convert_int8_rtp(half8);\n"
32585"int8 __ovld __cnfn convert_int8_rtn(half8);\n"
32586"int8 __ovld __cnfn convert_int8_rtz(half8);\n"
32587"int8 __ovld __cnfn convert_int8_sat(half8);\n"
32588"int8 __ovld __cnfn convert_int8_sat_rte(half8);\n"
32589"int8 __ovld __cnfn convert_int8_sat_rtp(half8);\n"
32590"int8 __ovld __cnfn convert_int8_sat_rtn(half8);\n"
32591"int8 __ovld __cnfn convert_int8_sat_rtz(half8);\n"
32592"int16 __ovld __cnfn convert_int16(half16);\n"
32593"int16 __ovld __cnfn convert_int16_rte(half16);\n"
32594"int16 __ovld __cnfn convert_int16_rtp(half16);\n"
32595"int16 __ovld __cnfn convert_int16_rtn(half16);\n"
32596"int16 __ovld __cnfn convert_int16_rtz(half16);\n"
32597"int16 __ovld __cnfn convert_int16_sat(half16);\n"
32598"int16 __ovld __cnfn convert_int16_sat_rte(half16);\n"
32599"int16 __ovld __cnfn convert_int16_sat_rtp(half16);\n"
32600"int16 __ovld __cnfn convert_int16_sat_rtn(half16);\n"
32601"int16 __ovld __cnfn convert_int16_sat_rtz(half16);\n"
32602"long __ovld __cnfn convert_long(half);\n"
32603"long __ovld __cnfn convert_long_rte(half);\n"
32604"long __ovld __cnfn convert_long_rtp(half);\n"
32605"long __ovld __cnfn convert_long_rtn(half);\n"
32606"long __ovld __cnfn convert_long_rtz(half);\n"
32607"long __ovld __cnfn convert_long_sat(half);\n"
32608"long __ovld __cnfn convert_long_sat_rte(half);\n"
32609"long __ovld __cnfn convert_long_sat_rtp(half);\n"
32610"long __ovld __cnfn convert_long_sat_rtn(half);\n"
32611"long __ovld __cnfn convert_long_sat_rtz(half);\n"
32612"long2 __ovld __cnfn convert_long2(half2);\n"
32613"long2 __ovld __cnfn convert_long2_rte(half2);\n"
32614"long2 __ovld __cnfn convert_long2_rtp(half2);\n"
32615"long2 __ovld __cnfn convert_long2_rtn(half2);\n"
32616"long2 __ovld __cnfn convert_long2_rtz(half2);\n"
32617"long2 __ovld __cnfn convert_long2_sat(half2);\n"
32618"long2 __ovld __cnfn convert_long2_sat_rte(half2);\n"
32619"long2 __ovld __cnfn convert_long2_sat_rtp(half2);\n"
32620"long2 __ovld __cnfn convert_long2_sat_rtn(half2);\n"
32621"long2 __ovld __cnfn convert_long2_sat_rtz(half2);\n"
32622"long3 __ovld __cnfn convert_long3(half3);\n"
32623"long3 __ovld __cnfn convert_long3_rte(half3);\n"
32624"long3 __ovld __cnfn convert_long3_rtp(half3);\n"
32625"long3 __ovld __cnfn convert_long3_rtn(half3);\n"
32626"long3 __ovld __cnfn convert_long3_rtz(half3);\n"
32627"long3 __ovld __cnfn convert_long3_sat(half3);\n"
32628"long3 __ovld __cnfn convert_long3_sat_rte(half3);\n"
32629"long3 __ovld __cnfn convert_long3_sat_rtp(half3);\n"
32630"long3 __ovld __cnfn convert_long3_sat_rtn(half3);\n"
32631"long3 __ovld __cnfn convert_long3_sat_rtz(half3);\n"
32632"long4 __ovld __cnfn convert_long4(half4);\n"
32633"long4 __ovld __cnfn convert_long4_rte(half4);\n"
32634"long4 __ovld __cnfn convert_long4_rtp(half4);\n"
32635"long4 __ovld __cnfn convert_long4_rtn(half4);\n"
32636"long4 __ovld __cnfn convert_long4_rtz(half4);\n"
32637"long4 __ovld __cnfn convert_long4_sat(half4);\n"
32638"long4 __ovld __cnfn convert_long4_sat_rte(half4);\n"
32639"long4 __ovld __cnfn convert_long4_sat_rtp(half4);\n"
32640"long4 __ovld __cnfn convert_long4_sat_rtn(half4);\n"
32641"long4 __ovld __cnfn convert_long4_sat_rtz(half4);\n"
32642"long8 __ovld __cnfn convert_long8(half8);\n"
32643"long8 __ovld __cnfn convert_long8_rte(half8);\n"
32644"long8 __ovld __cnfn convert_long8_rtp(half8);\n"
32645"long8 __ovld __cnfn convert_long8_rtn(half8);\n"
32646"long8 __ovld __cnfn convert_long8_rtz(half8);\n"
32647"long8 __ovld __cnfn convert_long8_sat(half8);\n"
32648"long8 __ovld __cnfn convert_long8_sat_rte(half8);\n"
32649"long8 __ovld __cnfn convert_long8_sat_rtp(half8);\n"
32650"long8 __ovld __cnfn convert_long8_sat_rtn(half8);\n"
32651"long8 __ovld __cnfn convert_long8_sat_rtz(half8);\n"
32652"long16 __ovld __cnfn convert_long16(half16);\n"
32653"long16 __ovld __cnfn convert_long16_rte(half16);\n"
32654"long16 __ovld __cnfn convert_long16_rtp(half16);\n"
32655"long16 __ovld __cnfn convert_long16_rtn(half16);\n"
32656"long16 __ovld __cnfn convert_long16_rtz(half16);\n"
32657"long16 __ovld __cnfn convert_long16_sat(half16);\n"
32658"long16 __ovld __cnfn convert_long16_sat_rte(half16);\n"
32659"long16 __ovld __cnfn convert_long16_sat_rtp(half16);\n"
32660"long16 __ovld __cnfn convert_long16_sat_rtn(half16);\n"
32661"long16 __ovld __cnfn convert_long16_sat_rtz(half16);\n"
32662"float __ovld __cnfn convert_float(half);\n"
32663"float __ovld __cnfn convert_float_rte(half);\n"
32664"float __ovld __cnfn convert_float_rtp(half);\n"
32665"float __ovld __cnfn convert_float_rtn(half);\n"
32666"float __ovld __cnfn convert_float_rtz(half);\n"
32667"float2 __ovld __cnfn convert_float2(half2);\n"
32668"float2 __ovld __cnfn convert_float2_rte(half2);\n"
32669"float2 __ovld __cnfn convert_float2_rtp(half2);\n"
32670"float2 __ovld __cnfn convert_float2_rtn(half2);\n"
32671"float2 __ovld __cnfn convert_float2_rtz(half2);\n"
32672"float3 __ovld __cnfn convert_float3(half3);\n"
32673"float3 __ovld __cnfn convert_float3_rte(half3);\n"
32674"float3 __ovld __cnfn convert_float3_rtp(half3);\n"
32675"float3 __ovld __cnfn convert_float3_rtn(half3);\n"
32676"float3 __ovld __cnfn convert_float3_rtz(half3);\n"
32677"float4 __ovld __cnfn convert_float4(half4);\n"
32678"float4 __ovld __cnfn convert_float4_rte(half4);\n"
32679"float4 __ovld __cnfn convert_float4_rtp(half4);\n"
32680"float4 __ovld __cnfn convert_float4_rtn(half4);\n"
32681"float4 __ovld __cnfn convert_float4_rtz(half4);\n"
32682"float8 __ovld __cnfn convert_float8(half8);\n"
32683"float8 __ovld __cnfn convert_float8_rte(half8);\n"
32684"float8 __ovld __cnfn convert_float8_rtp(half8);\n"
32685"float8 __ovld __cnfn convert_float8_rtn(half8);\n"
32686"float8 __ovld __cnfn convert_float8_rtz(half8);\n"
32687"float16 __ovld __cnfn convert_float16(half16);\n"
32688"float16 __ovld __cnfn convert_float16_rte(half16);\n"
32689"float16 __ovld __cnfn convert_float16_rtp(half16);\n"
32690"float16 __ovld __cnfn convert_float16_rtn(half16);\n"
32691"float16 __ovld __cnfn convert_float16_rtz(half16);\n"
32692"\n"
32693"// Convert non-double types to half types.\n"
32694"half __ovld __cnfn convert_half(uchar);\n"
32695"half __ovld __cnfn convert_half(ushort);\n"
32696"half __ovld __cnfn convert_half(uint);\n"
32697"half __ovld __cnfn convert_half(ulong);\n"
32698"half __ovld __cnfn convert_half(char);\n"
32699"half __ovld __cnfn convert_half(short);\n"
32700"half __ovld __cnfn convert_half(int);\n"
32701"half __ovld __cnfn convert_half(long);\n"
32702"half __ovld __cnfn convert_half(float);\n"
32703"half __ovld __cnfn convert_half(half);\n"
32704"half __ovld __cnfn convert_half_rte(uchar);\n"
32705"half __ovld __cnfn convert_half_rte(ushort);\n"
32706"half __ovld __cnfn convert_half_rte(uint);\n"
32707"half __ovld __cnfn convert_half_rte(ulong);\n"
32708"half __ovld __cnfn convert_half_rte(char);\n"
32709"half __ovld __cnfn convert_half_rte(short);\n"
32710"half __ovld __cnfn convert_half_rte(int);\n"
32711"half __ovld __cnfn convert_half_rte(long);\n"
32712"half __ovld __cnfn convert_half_rte(float);\n"
32713"half __ovld __cnfn convert_half_rte(half);\n"
32714"half __ovld __cnfn convert_half_rtp(uchar);\n"
32715"half __ovld __cnfn convert_half_rtp(ushort);\n"
32716"half __ovld __cnfn convert_half_rtp(uint);\n"
32717"half __ovld __cnfn convert_half_rtp(ulong);\n"
32718"half __ovld __cnfn convert_half_rtp(char);\n"
32719"half __ovld __cnfn convert_half_rtp(short);\n"
32720"half __ovld __cnfn convert_half_rtp(int);\n"
32721"half __ovld __cnfn convert_half_rtp(long);\n"
32722"half __ovld __cnfn convert_half_rtp(float);\n"
32723"half __ovld __cnfn convert_half_rtp(half);\n"
32724"half __ovld __cnfn convert_half_rtn(uchar);\n"
32725"half __ovld __cnfn convert_half_rtn(ushort);\n"
32726"half __ovld __cnfn convert_half_rtn(uint);\n"
32727"half __ovld __cnfn convert_half_rtn(ulong);\n"
32728"half __ovld __cnfn convert_half_rtn(char);\n"
32729"half __ovld __cnfn convert_half_rtn(short);\n"
32730"half __ovld __cnfn convert_half_rtn(int);\n"
32731"half __ovld __cnfn convert_half_rtn(long);\n"
32732"half __ovld __cnfn convert_half_rtn(float);\n"
32733"half __ovld __cnfn convert_half_rtn(half);\n"
32734"half __ovld __cnfn convert_half_rtz(uchar);\n"
32735"half __ovld __cnfn convert_half_rtz(ushort);\n"
32736"half __ovld __cnfn convert_half_rtz(uint);\n"
32737"half __ovld __cnfn convert_half_rtz(ulong);\n"
32738"half __ovld __cnfn convert_half_rtz(char);\n"
32739"half __ovld __cnfn convert_half_rtz(short);\n"
32740"half __ovld __cnfn convert_half_rtz(int);\n"
32741"half __ovld __cnfn convert_half_rtz(long);\n"
32742"half __ovld __cnfn convert_half_rtz(float);\n"
32743"half __ovld __cnfn convert_half_rtz(half);\n"
32744"half2 __ovld __cnfn convert_half2(char2);\n"
32745"half2 __ovld __cnfn convert_half2(uchar2);\n"
32746"half2 __ovld __cnfn convert_half2(short2);\n"
32747"half2 __ovld __cnfn convert_half2(ushort2);\n"
32748"half2 __ovld __cnfn convert_half2(int2);\n"
32749"half2 __ovld __cnfn convert_half2(uint2);\n"
32750"half2 __ovld __cnfn convert_half2(long2);\n"
32751"half2 __ovld __cnfn convert_half2(ulong2);\n"
32752"half2 __ovld __cnfn convert_half2(float2);\n"
32753"half2 __ovld __cnfn convert_half2(half2);\n"
32754"half2 __ovld __cnfn convert_half2_rte(char2);\n"
32755"half2 __ovld __cnfn convert_half2_rte(uchar2);\n"
32756"half2 __ovld __cnfn convert_half2_rte(short2);\n"
32757"half2 __ovld __cnfn convert_half2_rte(ushort2);\n"
32758"half2 __ovld __cnfn convert_half2_rte(int2);\n"
32759"half2 __ovld __cnfn convert_half2_rte(uint2);\n"
32760"half2 __ovld __cnfn convert_half2_rte(long2);\n"
32761"half2 __ovld __cnfn convert_half2_rte(ulong2);\n"
32762"half2 __ovld __cnfn convert_half2_rte(float2);\n"
32763"half2 __ovld __cnfn convert_half2_rte(half2);\n"
32764"half2 __ovld __cnfn convert_half2_rtp(char2);\n"
32765"half2 __ovld __cnfn convert_half2_rtp(uchar2);\n"
32766"half2 __ovld __cnfn convert_half2_rtp(short2);\n"
32767"half2 __ovld __cnfn convert_half2_rtp(ushort2);\n"
32768"half2 __ovld __cnfn convert_half2_rtp(int2);\n"
32769"half2 __ovld __cnfn convert_half2_rtp(uint2);\n"
32770"half2 __ovld __cnfn convert_half2_rtp(long2);\n"
32771"half2 __ovld __cnfn convert_half2_rtp(ulong2);\n"
32772"half2 __ovld __cnfn convert_half2_rtp(float2);\n"
32773"half2 __ovld __cnfn convert_half2_rtp(half2);\n"
32774"half2 __ovld __cnfn convert_half2_rtn(char2);\n"
32775"half2 __ovld __cnfn convert_half2_rtn(uchar2);\n"
32776"half2 __ovld __cnfn convert_half2_rtn(short2);\n"
32777"half2 __ovld __cnfn convert_half2_rtn(ushort2);\n"
32778"half2 __ovld __cnfn convert_half2_rtn(int2);\n"
32779"half2 __ovld __cnfn convert_half2_rtn(uint2);\n"
32780"half2 __ovld __cnfn convert_half2_rtn(long2);\n"
32781"half2 __ovld __cnfn convert_half2_rtn(ulong2);\n"
32782"half2 __ovld __cnfn convert_half2_rtn(float2);\n"
32783"half2 __ovld __cnfn convert_half2_rtn(half2);\n"
32784"half2 __ovld __cnfn convert_half2_rtz(char2);\n"
32785"half2 __ovld __cnfn convert_half2_rtz(uchar2);\n"
32786"half2 __ovld __cnfn convert_half2_rtz(short2);\n"
32787"half2 __ovld __cnfn convert_half2_rtz(ushort2);\n"
32788"half2 __ovld __cnfn convert_half2_rtz(int2);\n"
32789"half2 __ovld __cnfn convert_half2_rtz(uint2);\n"
32790"half2 __ovld __cnfn convert_half2_rtz(long2);\n"
32791"half2 __ovld __cnfn convert_half2_rtz(ulong2);\n"
32792"half2 __ovld __cnfn convert_half2_rtz(float2);\n"
32793"half2 __ovld __cnfn convert_half2_rtz(half2);\n"
32794"half3 __ovld __cnfn convert_half3(char3);\n"
32795"half3 __ovld __cnfn convert_half3(uchar3);\n"
32796"half3 __ovld __cnfn convert_half3(short3);\n"
32797"half3 __ovld __cnfn convert_half3(ushort3);\n"
32798"half3 __ovld __cnfn convert_half3(int3);\n"
32799"half3 __ovld __cnfn convert_half3(uint3);\n"
32800"half3 __ovld __cnfn convert_half3(long3);\n"
32801"half3 __ovld __cnfn convert_half3(ulong3);\n"
32802"half3 __ovld __cnfn convert_half3(float3);\n"
32803"half3 __ovld __cnfn convert_half3(half3);\n"
32804"half3 __ovld __cnfn convert_half3_rte(char3);\n"
32805"half3 __ovld __cnfn convert_half3_rte(uchar3);\n"
32806"half3 __ovld __cnfn convert_half3_rte(short3);\n"
32807"half3 __ovld __cnfn convert_half3_rte(ushort3);\n"
32808"half3 __ovld __cnfn convert_half3_rte(int3);\n"
32809"half3 __ovld __cnfn convert_half3_rte(uint3);\n"
32810"half3 __ovld __cnfn convert_half3_rte(long3);\n"
32811"half3 __ovld __cnfn convert_half3_rte(ulong3);\n"
32812"half3 __ovld __cnfn convert_half3_rte(float3);\n"
32813"half3 __ovld __cnfn convert_half3_rte(half3);\n"
32814"half3 __ovld __cnfn convert_half3_rtp(char3);\n"
32815"half3 __ovld __cnfn convert_half3_rtp(uchar3);\n"
32816"half3 __ovld __cnfn convert_half3_rtp(short3);\n"
32817"half3 __ovld __cnfn convert_half3_rtp(ushort3);\n"
32818"half3 __ovld __cnfn convert_half3_rtp(int3);\n"
32819"half3 __ovld __cnfn convert_half3_rtp(uint3);\n"
32820"half3 __ovld __cnfn convert_half3_rtp(long3);\n"
32821"half3 __ovld __cnfn convert_half3_rtp(ulong3);\n"
32822"half3 __ovld __cnfn convert_half3_rtp(float3);\n"
32823"half3 __ovld __cnfn convert_half3_rtp(half3);\n"
32824"half3 __ovld __cnfn convert_half3_rtn(char3);\n"
32825"half3 __ovld __cnfn convert_half3_rtn(uchar3);\n"
32826"half3 __ovld __cnfn convert_half3_rtn(short3);\n"
32827"half3 __ovld __cnfn convert_half3_rtn(ushort3);\n"
32828"half3 __ovld __cnfn convert_half3_rtn(int3);\n"
32829"half3 __ovld __cnfn convert_half3_rtn(uint3);\n"
32830"half3 __ovld __cnfn convert_half3_rtn(long3);\n"
32831"half3 __ovld __cnfn convert_half3_rtn(ulong3);\n"
32832"half3 __ovld __cnfn convert_half3_rtn(float3);\n"
32833"half3 __ovld __cnfn convert_half3_rtn(half3);\n"
32834"half3 __ovld __cnfn convert_half3_rtz(char3);\n"
32835"half3 __ovld __cnfn convert_half3_rtz(uchar3);\n"
32836"half3 __ovld __cnfn convert_half3_rtz(short3);\n"
32837"half3 __ovld __cnfn convert_half3_rtz(ushort3);\n"
32838"half3 __ovld __cnfn convert_half3_rtz(int3);\n"
32839"half3 __ovld __cnfn convert_half3_rtz(uint3);\n"
32840"half3 __ovld __cnfn convert_half3_rtz(long3);\n"
32841"half3 __ovld __cnfn convert_half3_rtz(ulong3);\n"
32842"half3 __ovld __cnfn convert_half3_rtz(float3);\n"
32843"half3 __ovld __cnfn convert_half3_rtz(half3);\n"
32844"half4 __ovld __cnfn convert_half4(char4);\n"
32845"half4 __ovld __cnfn convert_half4(uchar4);\n"
32846"half4 __ovld __cnfn convert_half4(short4);\n"
32847"half4 __ovld __cnfn convert_half4(ushort4);\n"
32848"half4 __ovld __cnfn convert_half4(int4);\n"
32849"half4 __ovld __cnfn convert_half4(uint4);\n"
32850"half4 __ovld __cnfn convert_half4(long4);\n"
32851"half4 __ovld __cnfn convert_half4(ulong4);\n"
32852"half4 __ovld __cnfn convert_half4(float4);\n"
32853"half4 __ovld __cnfn convert_half4(half4);\n"
32854"half4 __ovld __cnfn convert_half4_rte(char4);\n"
32855"half4 __ovld __cnfn convert_half4_rte(uchar4);\n"
32856"half4 __ovld __cnfn convert_half4_rte(short4);\n"
32857"half4 __ovld __cnfn convert_half4_rte(ushort4);\n"
32858"half4 __ovld __cnfn convert_half4_rte(int4);\n"
32859"half4 __ovld __cnfn convert_half4_rte(uint4);\n"
32860"half4 __ovld __cnfn convert_half4_rte(long4);\n"
32861"half4 __ovld __cnfn convert_half4_rte(ulong4);\n"
32862"half4 __ovld __cnfn convert_half4_rte(float4);\n"
32863"half4 __ovld __cnfn convert_half4_rte(half4);\n"
32864"half4 __ovld __cnfn convert_half4_rtp(char4);\n"
32865"half4 __ovld __cnfn convert_half4_rtp(uchar4);\n"
32866"half4 __ovld __cnfn convert_half4_rtp(short4);\n"
32867"half4 __ovld __cnfn convert_half4_rtp(ushort4);\n"
32868"half4 __ovld __cnfn convert_half4_rtp(int4);\n"
32869"half4 __ovld __cnfn convert_half4_rtp(uint4);\n"
32870"half4 __ovld __cnfn convert_half4_rtp(long4);\n"
32871"half4 __ovld __cnfn convert_half4_rtp(ulong4);\n"
32872"half4 __ovld __cnfn convert_half4_rtp(float4);\n"
32873"half4 __ovld __cnfn convert_half4_rtp(half4);\n"
32874"half4 __ovld __cnfn convert_half4_rtn(char4);\n"
32875"half4 __ovld __cnfn convert_half4_rtn(uchar4);\n"
32876"half4 __ovld __cnfn convert_half4_rtn(short4);\n"
32877"half4 __ovld __cnfn convert_half4_rtn(ushort4);\n"
32878"half4 __ovld __cnfn convert_half4_rtn(int4);\n"
32879"half4 __ovld __cnfn convert_half4_rtn(uint4);\n"
32880"half4 __ovld __cnfn convert_half4_rtn(long4);\n"
32881"half4 __ovld __cnfn convert_half4_rtn(ulong4);\n"
32882"half4 __ovld __cnfn convert_half4_rtn(float4);\n"
32883"half4 __ovld __cnfn convert_half4_rtn(half4);\n"
32884"half4 __ovld __cnfn convert_half4_rtz(char4);\n"
32885"half4 __ovld __cnfn convert_half4_rtz(uchar4);\n"
32886"half4 __ovld __cnfn convert_half4_rtz(short4);\n"
32887"half4 __ovld __cnfn convert_half4_rtz(ushort4);\n"
32888"half4 __ovld __cnfn convert_half4_rtz(int4);\n"
32889"half4 __ovld __cnfn convert_half4_rtz(uint4);\n"
32890"half4 __ovld __cnfn convert_half4_rtz(long4);\n"
32891"half4 __ovld __cnfn convert_half4_rtz(ulong4);\n"
32892"half4 __ovld __cnfn convert_half4_rtz(float4);\n"
32893"half4 __ovld __cnfn convert_half4_rtz(half4);\n"
32894"half8 __ovld __cnfn convert_half8(char8);\n"
32895"half8 __ovld __cnfn convert_half8(uchar8);\n"
32896"half8 __ovld __cnfn convert_half8(short8);\n"
32897"half8 __ovld __cnfn convert_half8(ushort8);\n"
32898"half8 __ovld __cnfn convert_half8(int8);\n"
32899"half8 __ovld __cnfn convert_half8(uint8);\n"
32900"half8 __ovld __cnfn convert_half8(long8);\n"
32901"half8 __ovld __cnfn convert_half8(ulong8);\n"
32902"half8 __ovld __cnfn convert_half8(float8);\n"
32903"half8 __ovld __cnfn convert_half8(half8);\n"
32904"half8 __ovld __cnfn convert_half8_rte(char8);\n"
32905"half8 __ovld __cnfn convert_half8_rte(uchar8);\n"
32906"half8 __ovld __cnfn convert_half8_rte(short8);\n"
32907"half8 __ovld __cnfn convert_half8_rte(ushort8);\n"
32908"half8 __ovld __cnfn convert_half8_rte(int8);\n"
32909"half8 __ovld __cnfn convert_half8_rte(uint8);\n"
32910"half8 __ovld __cnfn convert_half8_rte(long8);\n"
32911"half8 __ovld __cnfn convert_half8_rte(ulong8);\n"
32912"half8 __ovld __cnfn convert_half8_rte(float8);\n"
32913"half8 __ovld __cnfn convert_half8_rte(half8);\n"
32914"half8 __ovld __cnfn convert_half8_rtp(char8);\n"
32915"half8 __ovld __cnfn convert_half8_rtp(uchar8);\n"
32916"half8 __ovld __cnfn convert_half8_rtp(short8);\n"
32917"half8 __ovld __cnfn convert_half8_rtp(ushort8);\n"
32918"half8 __ovld __cnfn convert_half8_rtp(int8);\n"
32919"half8 __ovld __cnfn convert_half8_rtp(uint8);\n"
32920"half8 __ovld __cnfn convert_half8_rtp(long8);\n"
32921"half8 __ovld __cnfn convert_half8_rtp(ulong8);\n"
32922"half8 __ovld __cnfn convert_half8_rtp(float8);\n"
32923"half8 __ovld __cnfn convert_half8_rtp(half8);\n"
32924"half8 __ovld __cnfn convert_half8_rtn(char8);\n"
32925"half8 __ovld __cnfn convert_half8_rtn(uchar8);\n"
32926"half8 __ovld __cnfn convert_half8_rtn(short8);\n"
32927"half8 __ovld __cnfn convert_half8_rtn(ushort8);\n"
32928"half8 __ovld __cnfn convert_half8_rtn(int8);\n"
32929"half8 __ovld __cnfn convert_half8_rtn(uint8);\n"
32930"half8 __ovld __cnfn convert_half8_rtn(long8);\n"
32931"half8 __ovld __cnfn convert_half8_rtn(ulong8);\n"
32932"half8 __ovld __cnfn convert_half8_rtn(float8);\n"
32933"half8 __ovld __cnfn convert_half8_rtn(half8);\n"
32934"half8 __ovld __cnfn convert_half8_rtz(char8);\n"
32935"half8 __ovld __cnfn convert_half8_rtz(uchar8);\n"
32936"half8 __ovld __cnfn convert_half8_rtz(short8);\n"
32937"half8 __ovld __cnfn convert_half8_rtz(ushort8);\n"
32938"half8 __ovld __cnfn convert_half8_rtz(int8);\n"
32939"half8 __ovld __cnfn convert_half8_rtz(uint8);\n"
32940"half8 __ovld __cnfn convert_half8_rtz(long8);\n"
32941"half8 __ovld __cnfn convert_half8_rtz(ulong8);\n"
32942"half8 __ovld __cnfn convert_half8_rtz(float8);\n"
32943"half8 __ovld __cnfn convert_half8_rtz(half8);\n"
32944"half16 __ovld __cnfn convert_half16(char16);\n"
32945"half16 __ovld __cnfn convert_half16(uchar16);\n"
32946"half16 __ovld __cnfn convert_half16(short16);\n"
32947"half16 __ovld __cnfn convert_half16(ushort16);\n"
32948"half16 __ovld __cnfn convert_half16(int16);\n"
32949"half16 __ovld __cnfn convert_half16(uint16);\n"
32950"half16 __ovld __cnfn convert_half16(long16);\n"
32951"half16 __ovld __cnfn convert_half16(ulong16);\n"
32952"half16 __ovld __cnfn convert_half16(float16);\n"
32953"half16 __ovld __cnfn convert_half16(half16);\n"
32954"half16 __ovld __cnfn convert_half16_rte(char16);\n"
32955"half16 __ovld __cnfn convert_half16_rte(uchar16);\n"
32956"half16 __ovld __cnfn convert_half16_rte(short16);\n"
32957"half16 __ovld __cnfn convert_half16_rte(ushort16);\n"
32958"half16 __ovld __cnfn convert_half16_rte(int16);\n"
32959"half16 __ovld __cnfn convert_half16_rte(uint16);\n"
32960"half16 __ovld __cnfn convert_half16_rte(long16);\n"
32961"half16 __ovld __cnfn convert_half16_rte(ulong16);\n"
32962"half16 __ovld __cnfn convert_half16_rte(float16);\n"
32963"half16 __ovld __cnfn convert_half16_rte(half16);\n"
32964"half16 __ovld __cnfn convert_half16_rtp(char16);\n"
32965"half16 __ovld __cnfn convert_half16_rtp(uchar16);\n"
32966"half16 __ovld __cnfn convert_half16_rtp(short16);\n"
32967"half16 __ovld __cnfn convert_half16_rtp(ushort16);\n"
32968"half16 __ovld __cnfn convert_half16_rtp(int16);\n"
32969"half16 __ovld __cnfn convert_half16_rtp(uint16);\n"
32970"half16 __ovld __cnfn convert_half16_rtp(long16);\n"
32971"half16 __ovld __cnfn convert_half16_rtp(ulong16);\n"
32972"half16 __ovld __cnfn convert_half16_rtp(float16);\n"
32973"half16 __ovld __cnfn convert_half16_rtp(half16);\n"
32974"half16 __ovld __cnfn convert_half16_rtn(char16);\n"
32975"half16 __ovld __cnfn convert_half16_rtn(uchar16);\n"
32976"half16 __ovld __cnfn convert_half16_rtn(short16);\n"
32977"half16 __ovld __cnfn convert_half16_rtn(ushort16);\n"
32978"half16 __ovld __cnfn convert_half16_rtn(int16);\n"
32979"half16 __ovld __cnfn convert_half16_rtn(uint16);\n"
32980"half16 __ovld __cnfn convert_half16_rtn(long16);\n"
32981"half16 __ovld __cnfn convert_half16_rtn(ulong16);\n"
32982"half16 __ovld __cnfn convert_half16_rtn(float16);\n"
32983"half16 __ovld __cnfn convert_half16_rtn(half16);\n"
32984"half16 __ovld __cnfn convert_half16_rtz(char16);\n"
32985"half16 __ovld __cnfn convert_half16_rtz(uchar16);\n"
32986"half16 __ovld __cnfn convert_half16_rtz(short16);\n"
32987"half16 __ovld __cnfn convert_half16_rtz(ushort16);\n"
32988"half16 __ovld __cnfn convert_half16_rtz(int16);\n"
32989"half16 __ovld __cnfn convert_half16_rtz(uint16);\n"
32990"half16 __ovld __cnfn convert_half16_rtz(long16);\n"
32991"half16 __ovld __cnfn convert_half16_rtz(ulong16);\n"
32992"half16 __ovld __cnfn convert_half16_rtz(float16);\n"
32993"half16 __ovld __cnfn convert_half16_rtz(half16);\n"
32994"\n"
32995"// Convert half types to double types.\n"
32996"#ifdef cl_khr_fp64\n"
32997"double __ovld __cnfn convert_double(half);\n"
32998"double __ovld __cnfn convert_double_rte(half);\n"
32999"double __ovld __cnfn convert_double_rtp(half);\n"
33000"double __ovld __cnfn convert_double_rtn(half);\n"
33001"double __ovld __cnfn convert_double_rtz(half);\n"
33002"double2 __ovld __cnfn convert_double2(half2);\n"
33003"double2 __ovld __cnfn convert_double2_rte(half2);\n"
33004"double2 __ovld __cnfn convert_double2_rtp(half2);\n"
33005"double2 __ovld __cnfn convert_double2_rtn(half2);\n"
33006"double2 __ovld __cnfn convert_double2_rtz(half2);\n"
33007"double3 __ovld __cnfn convert_double3(half3);\n"
33008"double3 __ovld __cnfn convert_double3_rte(half3);\n"
33009"double3 __ovld __cnfn convert_double3_rtp(half3);\n"
33010"double3 __ovld __cnfn convert_double3_rtn(half3);\n"
33011"double3 __ovld __cnfn convert_double3_rtz(half3);\n"
33012"double4 __ovld __cnfn convert_double4(half4);\n"
33013"double4 __ovld __cnfn convert_double4_rte(half4);\n"
33014"double4 __ovld __cnfn convert_double4_rtp(half4);\n"
33015"double4 __ovld __cnfn convert_double4_rtn(half4);\n"
33016"double4 __ovld __cnfn convert_double4_rtz(half4);\n"
33017"double8 __ovld __cnfn convert_double8(half8);\n"
33018"double8 __ovld __cnfn convert_double8_rte(half8);\n"
33019"double8 __ovld __cnfn convert_double8_rtp(half8);\n"
33020"double8 __ovld __cnfn convert_double8_rtn(half8);\n"
33021"double8 __ovld __cnfn convert_double8_rtz(half8);\n"
33022"double16 __ovld __cnfn convert_double16(half16);\n"
33023"double16 __ovld __cnfn convert_double16_rte(half16);\n"
33024"double16 __ovld __cnfn convert_double16_rtp(half16);\n"
33025"double16 __ovld __cnfn convert_double16_rtn(half16);\n"
33026"double16 __ovld __cnfn convert_double16_rtz(half16);\n"
33027"\n"
33028"// Convert double types to half types.\n"
33029"half __ovld __cnfn convert_half(double);\n"
33030"half __ovld __cnfn convert_half_rte(double);\n"
33031"half __ovld __cnfn convert_half_rtp(double);\n"
33032"half __ovld __cnfn convert_half_rtn(double);\n"
33033"half __ovld __cnfn convert_half_rtz(double);\n"
33034"half2 __ovld __cnfn convert_half2(double2);\n"
33035"half2 __ovld __cnfn convert_half2_rte(double2);\n"
33036"half2 __ovld __cnfn convert_half2_rtp(double2);\n"
33037"half2 __ovld __cnfn convert_half2_rtn(double2);\n"
33038"half2 __ovld __cnfn convert_half2_rtz(double2);\n"
33039"half3 __ovld __cnfn convert_half3(double3);\n"
33040"half3 __ovld __cnfn convert_half3_rte(double3);\n"
33041"half3 __ovld __cnfn convert_half3_rtp(double3);\n"
33042"half3 __ovld __cnfn convert_half3_rtn(double3);\n"
33043"half3 __ovld __cnfn convert_half3_rtz(double3);\n"
33044"half4 __ovld __cnfn convert_half4(double4);\n"
33045"half4 __ovld __cnfn convert_half4_rte(double4);\n"
33046"half4 __ovld __cnfn convert_half4_rtp(double4);\n"
33047"half4 __ovld __cnfn convert_half4_rtn(double4);\n"
33048"half4 __ovld __cnfn convert_half4_rtz(double4);\n"
33049"half8 __ovld __cnfn convert_half8(double8);\n"
33050"half8 __ovld __cnfn convert_half8_rte(double8);\n"
33051"half8 __ovld __cnfn convert_half8_rtp(double8);\n"
33052"half8 __ovld __cnfn convert_half8_rtn(double8);\n"
33053"half8 __ovld __cnfn convert_half8_rtz(double8);\n"
33054"half16 __ovld __cnfn convert_half16(double16);\n"
33055"half16 __ovld __cnfn convert_half16_rte(double16);\n"
33056"half16 __ovld __cnfn convert_half16_rtp(double16);\n"
33057"half16 __ovld __cnfn convert_half16_rtn(double16);\n"
33058"half16 __ovld __cnfn convert_half16_rtz(double16);\n"
33059"#endif //cl_khr_fp64\n"
33060"\n"
33061"#endif // cl_khr_fp16\n"
33062"\n"
33063"/**\n"
33064" * OpenCL v1.1/1.2/2.0 s6.2.4.2 - as_type operators\n"
33065" * Reinterprets a data type as another data type of the same size\n"
33066" */\n"
33067"#define as_char(x) __builtin_astype((x), char)\n"
33068"#define as_char2(x) __builtin_astype((x), char2)\n"
33069"#define as_char3(x) __builtin_astype((x), char3)\n"
33070"#define as_char4(x) __builtin_astype((x), char4)\n"
33071"#define as_char8(x) __builtin_astype((x), char8)\n"
33072"#define as_char16(x) __builtin_astype((x), char16)\n"
33073"\n"
33074"#define as_uchar(x) __builtin_astype((x), uchar)\n"
33075"#define as_uchar2(x) __builtin_astype((x), uchar2)\n"
33076"#define as_uchar3(x) __builtin_astype((x), uchar3)\n"
33077"#define as_uchar4(x) __builtin_astype((x), uchar4)\n"
33078"#define as_uchar8(x) __builtin_astype((x), uchar8)\n"
33079"#define as_uchar16(x) __builtin_astype((x), uchar16)\n"
33080"\n"
33081"#define as_short(x) __builtin_astype((x), short)\n"
33082"#define as_short2(x) __builtin_astype((x), short2)\n"
33083"#define as_short3(x) __builtin_astype((x), short3)\n"
33084"#define as_short4(x) __builtin_astype((x), short4)\n"
33085"#define as_short8(x) __builtin_astype((x), short8)\n"
33086"#define as_short16(x) __builtin_astype((x), short16)\n"
33087"\n"
33088"#define as_ushort(x) __builtin_astype((x), ushort)\n"
33089"#define as_ushort2(x) __builtin_astype((x), ushort2)\n"
33090"#define as_ushort3(x) __builtin_astype((x), ushort3)\n"
33091"#define as_ushort4(x) __builtin_astype((x), ushort4)\n"
33092"#define as_ushort8(x) __builtin_astype((x), ushort8)\n"
33093"#define as_ushort16(x) __builtin_astype((x), ushort16)\n"
33094"\n"
33095"#define as_int(x) __builtin_astype((x), int)\n"
33096"#define as_int2(x) __builtin_astype((x), int2)\n"
33097"#define as_int3(x) __builtin_astype((x), int3)\n"
33098"#define as_int4(x) __builtin_astype((x), int4)\n"
33099"#define as_int8(x) __builtin_astype((x), int8)\n"
33100"#define as_int16(x) __builtin_astype((x), int16)\n"
33101"\n"
33102"#define as_uint(x) __builtin_astype((x), uint)\n"
33103"#define as_uint2(x) __builtin_astype((x), uint2)\n"
33104"#define as_uint3(x) __builtin_astype((x), uint3)\n"
33105"#define as_uint4(x) __builtin_astype((x), uint4)\n"
33106"#define as_uint8(x) __builtin_astype((x), uint8)\n"
33107"#define as_uint16(x) __builtin_astype((x), uint16)\n"
33108"\n"
33109"#define as_long(x) __builtin_astype((x), long)\n"
33110"#define as_long2(x) __builtin_astype((x), long2)\n"
33111"#define as_long3(x) __builtin_astype((x), long3)\n"
33112"#define as_long4(x) __builtin_astype((x), long4)\n"
33113"#define as_long8(x) __builtin_astype((x), long8)\n"
33114"#define as_long16(x) __builtin_astype((x), long16)\n"
33115"\n"
33116"#define as_ulong(x) __builtin_astype((x), ulong)\n"
33117"#define as_ulong2(x) __builtin_astype((x), ulong2)\n"
33118"#define as_ulong3(x) __builtin_astype((x), ulong3)\n"
33119"#define as_ulong4(x) __builtin_astype((x), ulong4)\n"
33120"#define as_ulong8(x) __builtin_astype((x), ulong8)\n"
33121"#define as_ulong16(x) __builtin_astype((x), ulong16)\n"
33122"\n"
33123"#define as_float(x) __builtin_astype((x), float)\n"
33124"#define as_float2(x) __builtin_astype((x), float2)\n"
33125"#define as_float3(x) __builtin_astype((x), float3)\n"
33126"#define as_float4(x) __builtin_astype((x), float4)\n"
33127"#define as_float8(x) __builtin_astype((x), float8)\n"
33128"#define as_float16(x) __builtin_astype((x), float16)\n"
33129"\n"
33130"#ifdef cl_khr_fp64\n"
33131"#define as_double(x) __builtin_astype((x), double)\n"
33132"#define as_double2(x) __builtin_astype((x), double2)\n"
33133"#define as_double3(x) __builtin_astype((x), double3)\n"
33134"#define as_double4(x) __builtin_astype((x), double4)\n"
33135"#define as_double8(x) __builtin_astype((x), double8)\n"
33136"#define as_double16(x) __builtin_astype((x), double16)\n"
33137"#endif //cl_khr_fp64\n"
33138"\n"
33139"#ifdef cl_khr_fp16\n"
33140"#define as_half(x) __builtin_astype((x), half)\n"
33141"#define as_half2(x) __builtin_astype((x), half2)\n"
33142"#define as_half3(x) __builtin_astype((x), half3)\n"
33143"#define as_half4(x) __builtin_astype((x), half4)\n"
33144"#define as_half8(x) __builtin_astype((x), half8)\n"
33145"#define as_half16(x) __builtin_astype((x), half16)\n"
33146"#endif //cl_khr_fp16\n"
33147"\n"
33148"// OpenCL v1.1 s6.9, v1.2/2.0 s6.10 - Function qualifiers\n"
33149"\n"
33150"#define __kernel_exec(X, typen) __kernel \\\n"
33151" __attribute__((work_group_size_hint(X, 1, 1))) \\\n"
33152" __attribute__((vec_type_hint(typen)))\n"
33153"\n"
33154"#define kernel_exec(X, typen) __kernel \\\n"
33155" __attribute__((work_group_size_hint(X, 1, 1))) \\\n"
33156" __attribute__((vec_type_hint(typen)))\n"
33157"\n"
33158"// OpenCL v1.1 s6.11.1, v1.2 s6.12.1, v2.0 s6.13.1 - Work-item Functions\n"
33159"\n"
33160"/**\n"
33161" * Returns the number of dimensions in use. This is the\n"
33162" * value given to the work_dim argument specified in\n"
33163" * clEnqueueNDRangeKernel.\n"
33164" * For clEnqueueTask, this returns 1.\n"
33165" */\n"
33166"uint __ovld __cnfn get_work_dim(void);\n"
33167"\n"
33168"/**\n"
33169" * Returns the number of global work-items specified for\n"
33170" * dimension identified by dimindx. This value is given by\n"
33171" * the global_work_size argument to\n"
33172" * clEnqueueNDRangeKernel. Valid values of dimindx\n"
33173" * are 0 to get_work_dim() - 1. For other values of\n"
33174" * dimindx, get_global_size() returns 1.\n"
33175" * For clEnqueueTask, this always returns 1.\n"
33176" */\n"
33177"size_t __ovld __cnfn get_global_size(uint dimindx);\n"
33178"\n"
33179"/**\n"
33180" * Returns the unique global work-item ID value for\n"
33181" * dimension identified by dimindx. The global work-item\n"
33182" * ID specifies the work-item ID based on the number of\n"
33183" * global work-items specified to execute the kernel. Valid\n"
33184" * values of dimindx are 0 to get_work_dim() - 1. For\n"
33185" * other values of dimindx, get_global_id() returns 0.\n"
33186" * For clEnqueueTask, this returns 0.\n"
33187" */\n"
33188"size_t __ovld __cnfn get_global_id(uint dimindx);\n"
33189"\n"
33190"/**\n"
33191" * Returns the number of local work-items specified in\n"
33192" * dimension identified by dimindx. This value is given by\n"
33193" * the local_work_size argument to\n"
33194" * clEnqueueNDRangeKernel if local_work_size is not\n"
33195" * NULL; otherwise the OpenCL implementation chooses\n"
33196" * an appropriate local_work_size value which is returned\n"
33197" * by this function. Valid values of dimindx are 0 to\n"
33198" * get_work_dim() - 1. For other values of dimindx,\n"
33199" * get_local_size() returns 1.\n"
33200" * For clEnqueueTask, this always returns 1.\n"
33201" */\n"
33202"size_t __ovld __cnfn get_local_size(uint dimindx);\n"
33203"\n"
33204"/**\n"
33205" * Returns the unique local work-item ID i.e. a work-item\n"
33206" * within a specific work-group for dimension identified by\n"
33207" * dimindx. Valid values of dimindx are 0 to\n"
33208" * get_work_dim() - 1. For other values of dimindx,\n"
33209" * get_local_id() returns 0.\n"
33210" * For clEnqueueTask, this returns 0.\n"
33211" */\n"
33212"size_t __ovld __cnfn get_local_id(uint dimindx);\n"
33213"\n"
33214"/**\n"
33215" * Returns the number of work-groups that will execute a\n"
33216" * kernel for dimension identified by dimindx.\n"
33217" * Valid values of dimindx are 0 to get_work_dim() - 1.\n"
33218" * For other values of dimindx, get_num_groups () returns\n"
33219" * 1.\n"
33220" * For clEnqueueTask, this always returns 1.\n"
33221" */\n"
33222"size_t __ovld __cnfn get_num_groups(uint dimindx);\n"
33223"\n"
33224"/**\n"
33225" * get_group_id returns the work-group ID which is a\n"
33226" * number from 0 .. get_num_groups(dimindx) - 1.\n"
33227" * Valid values of dimindx are 0 to get_work_dim() - 1.\n"
33228" * For other values, get_group_id() returns 0.\n"
33229" * For clEnqueueTask, this returns 0.\n"
33230" */\n"
33231"size_t __ovld __cnfn get_group_id(uint dimindx);\n"
33232"\n"
33233"/**\n"
33234" * get_global_offset returns the offset values specified in\n"
33235" * global_work_offset argument to\n"
33236" * clEnqueueNDRangeKernel.\n"
33237" * Valid values of dimindx are 0 to get_work_dim() - 1.\n"
33238" * For other values, get_global_offset() returns 0.\n"
33239" * For clEnqueueTask, this returns 0.\n"
33240" */\n"
33241"size_t __ovld __cnfn get_global_offset(uint dimindx);\n"
33242"\n"
33243"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
33244"size_t __ovld get_enqueued_local_size(uint dimindx);\n"
33245"size_t __ovld get_global_linear_id(void);\n"
33246"size_t __ovld get_local_linear_id(void);\n"
33247"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
33248"\n"
33249"// OpenCL v1.1 s6.11.2, v1.2 s6.12.2, v2.0 s6.13.2 - Math functions\n"
33250"\n"
33251"/**\n"
33252" * Arc cosine function.\n"
33253" */\n"
33254"float __ovld __cnfn acos(float);\n"
33255"float2 __ovld __cnfn acos(float2);\n"
33256"float3 __ovld __cnfn acos(float3);\n"
33257"float4 __ovld __cnfn acos(float4);\n"
33258"float8 __ovld __cnfn acos(float8);\n"
33259"float16 __ovld __cnfn acos(float16);\n"
33260"#ifdef cl_khr_fp64\n"
33261"double __ovld __cnfn acos(double);\n"
33262"double2 __ovld __cnfn acos(double2);\n"
33263"double3 __ovld __cnfn acos(double3);\n"
33264"double4 __ovld __cnfn acos(double4);\n"
33265"double8 __ovld __cnfn acos(double8);\n"
33266"double16 __ovld __cnfn acos(double16);\n"
33267"#endif //cl_khr_fp64\n"
33268"#ifdef cl_khr_fp16\n"
33269"half __ovld __cnfn acos(half);\n"
33270"half2 __ovld __cnfn acos(half2);\n"
33271"half3 __ovld __cnfn acos(half3);\n"
33272"half4 __ovld __cnfn acos(half4);\n"
33273"half8 __ovld __cnfn acos(half8);\n"
33274"half16 __ovld __cnfn acos(half16);\n"
33275"#endif //cl_khr_fp16\n"
33276"\n"
33277"/**\n"
33278" * Inverse hyperbolic cosine.\n"
33279" */\n"
33280"float __ovld __cnfn acosh(float);\n"
33281"float2 __ovld __cnfn acosh(float2);\n"
33282"float3 __ovld __cnfn acosh(float3);\n"
33283"float4 __ovld __cnfn acosh(float4);\n"
33284"float8 __ovld __cnfn acosh(float8);\n"
33285"float16 __ovld __cnfn acosh(float16);\n"
33286"#ifdef cl_khr_fp64\n"
33287"double __ovld __cnfn acosh(double);\n"
33288"double2 __ovld __cnfn acosh(double2);\n"
33289"double3 __ovld __cnfn acosh(double3);\n"
33290"double4 __ovld __cnfn acosh(double4);\n"
33291"double8 __ovld __cnfn acosh(double8);\n"
33292"double16 __ovld __cnfn acosh(double16);\n"
33293"#endif //cl_khr_fp64\n"
33294"#ifdef cl_khr_fp16\n"
33295"half __ovld __cnfn acosh(half);\n"
33296"half2 __ovld __cnfn acosh(half2);\n"
33297"half3 __ovld __cnfn acosh(half3);\n"
33298"half4 __ovld __cnfn acosh(half4);\n"
33299"half8 __ovld __cnfn acosh(half8);\n"
33300"half16 __ovld __cnfn acosh(half16);\n"
33301"#endif //cl_khr_fp16\n"
33302"\n"
33303"/**\n"
33304" * Compute acos (x) / PI.\n"
33305" */\n"
33306"float __ovld __cnfn acospi(float x);\n"
33307"float2 __ovld __cnfn acospi(float2 x);\n"
33308"float3 __ovld __cnfn acospi(float3 x);\n"
33309"float4 __ovld __cnfn acospi(float4 x);\n"
33310"float8 __ovld __cnfn acospi(float8 x);\n"
33311"float16 __ovld __cnfn acospi(float16 x);\n"
33312"#ifdef cl_khr_fp64\n"
33313"double __ovld __cnfn acospi(double x);\n"
33314"double2 __ovld __cnfn acospi(double2 x);\n"
33315"double3 __ovld __cnfn acospi(double3 x);\n"
33316"double4 __ovld __cnfn acospi(double4 x);\n"
33317"double8 __ovld __cnfn acospi(double8 x);\n"
33318"double16 __ovld __cnfn acospi(double16 x);\n"
33319"#endif //cl_khr_fp64\n"
33320"#ifdef cl_khr_fp16\n"
33321"half __ovld __cnfn acospi(half x);\n"
33322"half2 __ovld __cnfn acospi(half2 x);\n"
33323"half3 __ovld __cnfn acospi(half3 x);\n"
33324"half4 __ovld __cnfn acospi(half4 x);\n"
33325"half8 __ovld __cnfn acospi(half8 x);\n"
33326"half16 __ovld __cnfn acospi(half16 x);\n"
33327"#endif //cl_khr_fp16\n"
33328"\n"
33329"/**\n"
33330" * Arc sine function.\n"
33331" */\n"
33332"float __ovld __cnfn asin(float);\n"
33333"float2 __ovld __cnfn asin(float2);\n"
33334"float3 __ovld __cnfn asin(float3);\n"
33335"float4 __ovld __cnfn asin(float4);\n"
33336"float8 __ovld __cnfn asin(float8);\n"
33337"float16 __ovld __cnfn asin(float16);\n"
33338"#ifdef cl_khr_fp64\n"
33339"double __ovld __cnfn asin(double);\n"
33340"double2 __ovld __cnfn asin(double2);\n"
33341"double3 __ovld __cnfn asin(double3);\n"
33342"double4 __ovld __cnfn asin(double4);\n"
33343"double8 __ovld __cnfn asin(double8);\n"
33344"double16 __ovld __cnfn asin(double16);\n"
33345"#endif //cl_khr_fp64\n"
33346"#ifdef cl_khr_fp16\n"
33347"half __ovld __cnfn asin(half);\n"
33348"half2 __ovld __cnfn asin(half2);\n"
33349"half3 __ovld __cnfn asin(half3);\n"
33350"half4 __ovld __cnfn asin(half4);\n"
33351"half8 __ovld __cnfn asin(half8);\n"
33352"half16 __ovld __cnfn asin(half16);\n"
33353"#endif //cl_khr_fp16\n"
33354"\n"
33355"/**\n"
33356" * Inverse hyperbolic sine.\n"
33357" */\n"
33358"float __ovld __cnfn asinh(float);\n"
33359"float2 __ovld __cnfn asinh(float2);\n"
33360"float3 __ovld __cnfn asinh(float3);\n"
33361"float4 __ovld __cnfn asinh(float4);\n"
33362"float8 __ovld __cnfn asinh(float8);\n"
33363"float16 __ovld __cnfn asinh(float16);\n"
33364"#ifdef cl_khr_fp64\n"
33365"double __ovld __cnfn asinh(double);\n"
33366"double2 __ovld __cnfn asinh(double2);\n"
33367"double3 __ovld __cnfn asinh(double3);\n"
33368"double4 __ovld __cnfn asinh(double4);\n"
33369"double8 __ovld __cnfn asinh(double8);\n"
33370"double16 __ovld __cnfn asinh(double16);\n"
33371"#endif //cl_khr_fp64\n"
33372"#ifdef cl_khr_fp16\n"
33373"half __ovld __cnfn asinh(half);\n"
33374"half2 __ovld __cnfn asinh(half2);\n"
33375"half3 __ovld __cnfn asinh(half3);\n"
33376"half4 __ovld __cnfn asinh(half4);\n"
33377"half8 __ovld __cnfn asinh(half8);\n"
33378"half16 __ovld __cnfn asinh(half16);\n"
33379"#endif //cl_khr_fp16\n"
33380"\n"
33381"/**\n"
33382" * Compute asin (x) / PI.\n"
33383" */\n"
33384"float __ovld __cnfn asinpi(float x);\n"
33385"float2 __ovld __cnfn asinpi(float2 x);\n"
33386"float3 __ovld __cnfn asinpi(float3 x);\n"
33387"float4 __ovld __cnfn asinpi(float4 x);\n"
33388"float8 __ovld __cnfn asinpi(float8 x);\n"
33389"float16 __ovld __cnfn asinpi(float16 x);\n"
33390"#ifdef cl_khr_fp64\n"
33391"double __ovld __cnfn asinpi(double x);\n"
33392"double2 __ovld __cnfn asinpi(double2 x);\n"
33393"double3 __ovld __cnfn asinpi(double3 x);\n"
33394"double4 __ovld __cnfn asinpi(double4 x);\n"
33395"double8 __ovld __cnfn asinpi(double8 x);\n"
33396"double16 __ovld __cnfn asinpi(double16 x);\n"
33397"#endif //cl_khr_fp64\n"
33398"#ifdef cl_khr_fp16\n"
33399"half __ovld __cnfn asinpi(half x);\n"
33400"half2 __ovld __cnfn asinpi(half2 x);\n"
33401"half3 __ovld __cnfn asinpi(half3 x);\n"
33402"half4 __ovld __cnfn asinpi(half4 x);\n"
33403"half8 __ovld __cnfn asinpi(half8 x);\n"
33404"half16 __ovld __cnfn asinpi(half16 x);\n"
33405"#endif //cl_khr_fp16\n"
33406"\n"
33407"/**\n"
33408" * Arc tangent function.\n"
33409" */\n"
33410"float __ovld __cnfn atan(float y_over_x);\n"
33411"float2 __ovld __cnfn atan(float2 y_over_x);\n"
33412"float3 __ovld __cnfn atan(float3 y_over_x);\n"
33413"float4 __ovld __cnfn atan(float4 y_over_x);\n"
33414"float8 __ovld __cnfn atan(float8 y_over_x);\n"
33415"float16 __ovld __cnfn atan(float16 y_over_x);\n"
33416"#ifdef cl_khr_fp64\n"
33417"double __ovld __cnfn atan(double y_over_x);\n"
33418"double2 __ovld __cnfn atan(double2 y_over_x);\n"
33419"double3 __ovld __cnfn atan(double3 y_over_x);\n"
33420"double4 __ovld __cnfn atan(double4 y_over_x);\n"
33421"double8 __ovld __cnfn atan(double8 y_over_x);\n"
33422"double16 __ovld __cnfn atan(double16 y_over_x);\n"
33423"#endif //cl_khr_fp64\n"
33424"#ifdef cl_khr_fp16\n"
33425"half __ovld __cnfn atan(half y_over_x);\n"
33426"half2 __ovld __cnfn atan(half2 y_over_x);\n"
33427"half3 __ovld __cnfn atan(half3 y_over_x);\n"
33428"half4 __ovld __cnfn atan(half4 y_over_x);\n"
33429"half8 __ovld __cnfn atan(half8 y_over_x);\n"
33430"half16 __ovld __cnfn atan(half16 y_over_x);\n"
33431"#endif //cl_khr_fp16\n"
33432"\n"
33433"/**\n"
33434" * Arc tangent of y / x.\n"
33435" */\n"
33436"float __ovld __cnfn atan2(float y, float x);\n"
33437"float2 __ovld __cnfn atan2(float2 y, float2 x);\n"
33438"float3 __ovld __cnfn atan2(float3 y, float3 x);\n"
33439"float4 __ovld __cnfn atan2(float4 y, float4 x);\n"
33440"float8 __ovld __cnfn atan2(float8 y, float8 x);\n"
33441"float16 __ovld __cnfn atan2(float16 y, float16 x);\n"
33442"#ifdef cl_khr_fp64\n"
33443"double __ovld __cnfn atan2(double y, double x);\n"
33444"double2 __ovld __cnfn atan2(double2 y, double2 x);\n"
33445"double3 __ovld __cnfn atan2(double3 y, double3 x);\n"
33446"double4 __ovld __cnfn atan2(double4 y, double4 x);\n"
33447"double8 __ovld __cnfn atan2(double8 y, double8 x);\n"
33448"double16 __ovld __cnfn atan2(double16 y, double16 x);\n"
33449"#endif //cl_khr_fp64\n"
33450"#ifdef cl_khr_fp16\n"
33451"half __ovld __cnfn atan2(half y, half x);\n"
33452"half2 __ovld __cnfn atan2(half2 y, half2 x);\n"
33453"half3 __ovld __cnfn atan2(half3 y, half3 x);\n"
33454"half4 __ovld __cnfn atan2(half4 y, half4 x);\n"
33455"half8 __ovld __cnfn atan2(half8 y, half8 x);\n"
33456"half16 __ovld __cnfn atan2(half16 y, half16 x);\n"
33457"#endif //cl_khr_fp16\n"
33458"\n"
33459"/**\n"
33460" * Hyperbolic arc tangent.\n"
33461" */\n"
33462"float __ovld __cnfn atanh(float);\n"
33463"float2 __ovld __cnfn atanh(float2);\n"
33464"float3 __ovld __cnfn atanh(float3);\n"
33465"float4 __ovld __cnfn atanh(float4);\n"
33466"float8 __ovld __cnfn atanh(float8);\n"
33467"float16 __ovld __cnfn atanh(float16);\n"
33468"#ifdef cl_khr_fp64\n"
33469"double __ovld __cnfn atanh(double);\n"
33470"double2 __ovld __cnfn atanh(double2);\n"
33471"double3 __ovld __cnfn atanh(double3);\n"
33472"double4 __ovld __cnfn atanh(double4);\n"
33473"double8 __ovld __cnfn atanh(double8);\n"
33474"double16 __ovld __cnfn atanh(double16);\n"
33475"#endif //cl_khr_fp64\n"
33476"#ifdef cl_khr_fp16\n"
33477"half __ovld __cnfn atanh(half);\n"
33478"half2 __ovld __cnfn atanh(half2);\n"
33479"half3 __ovld __cnfn atanh(half3);\n"
33480"half4 __ovld __cnfn atanh(half4);\n"
33481"half8 __ovld __cnfn atanh(half8);\n"
33482"half16 __ovld __cnfn atanh(half16);\n"
33483"#endif //cl_khr_fp16\n"
33484"\n"
33485"/**\n"
33486" * Compute atan (x) / PI.\n"
33487" */\n"
33488"float __ovld __cnfn atanpi(float x);\n"
33489"float2 __ovld __cnfn atanpi(float2 x);\n"
33490"float3 __ovld __cnfn atanpi(float3 x);\n"
33491"float4 __ovld __cnfn atanpi(float4 x);\n"
33492"float8 __ovld __cnfn atanpi(float8 x);\n"
33493"float16 __ovld __cnfn atanpi(float16 x);\n"
33494"#ifdef cl_khr_fp64\n"
33495"double __ovld __cnfn atanpi(double x);\n"
33496"double2 __ovld __cnfn atanpi(double2 x);\n"
33497"double3 __ovld __cnfn atanpi(double3 x);\n"
33498"double4 __ovld __cnfn atanpi(double4 x);\n"
33499"double8 __ovld __cnfn atanpi(double8 x);\n"
33500"double16 __ovld __cnfn atanpi(double16 x);\n"
33501"#endif //cl_khr_fp64\n"
33502"#ifdef cl_khr_fp16\n"
33503"half __ovld __cnfn atanpi(half x);\n"
33504"half2 __ovld __cnfn atanpi(half2 x);\n"
33505"half3 __ovld __cnfn atanpi(half3 x);\n"
33506"half4 __ovld __cnfn atanpi(half4 x);\n"
33507"half8 __ovld __cnfn atanpi(half8 x);\n"
33508"half16 __ovld __cnfn atanpi(half16 x);\n"
33509"#endif //cl_khr_fp16\n"
33510"\n"
33511"/**\n"
33512" * Compute atan2 (y, x) / PI.\n"
33513" */\n"
33514"float __ovld __cnfn atan2pi(float y, float x);\n"
33515"float2 __ovld __cnfn atan2pi(float2 y, float2 x);\n"
33516"float3 __ovld __cnfn atan2pi(float3 y, float3 x);\n"
33517"float4 __ovld __cnfn atan2pi(float4 y, float4 x);\n"
33518"float8 __ovld __cnfn atan2pi(float8 y, float8 x);\n"
33519"float16 __ovld __cnfn atan2pi(float16 y, float16 x);\n"
33520"#ifdef cl_khr_fp64\n"
33521"double __ovld __cnfn atan2pi(double y, double x);\n"
33522"double2 __ovld __cnfn atan2pi(double2 y, double2 x);\n"
33523"double3 __ovld __cnfn atan2pi(double3 y, double3 x);\n"
33524"double4 __ovld __cnfn atan2pi(double4 y, double4 x);\n"
33525"double8 __ovld __cnfn atan2pi(double8 y, double8 x);\n"
33526"double16 __ovld __cnfn atan2pi(double16 y, double16 x);\n"
33527"#endif //cl_khr_fp64\n"
33528"#ifdef cl_khr_fp16\n"
33529"half __ovld __cnfn atan2pi(half y, half x);\n"
33530"half2 __ovld __cnfn atan2pi(half2 y, half2 x);\n"
33531"half3 __ovld __cnfn atan2pi(half3 y, half3 x);\n"
33532"half4 __ovld __cnfn atan2pi(half4 y, half4 x);\n"
33533"half8 __ovld __cnfn atan2pi(half8 y, half8 x);\n"
33534"half16 __ovld __cnfn atan2pi(half16 y, half16 x);\n"
33535"#endif //cl_khr_fp16\n"
33536"\n"
33537"/**\n"
33538" * Compute cube-root.\n"
33539" */\n"
33540"float __ovld __cnfn cbrt(float);\n"
33541"float2 __ovld __cnfn cbrt(float2);\n"
33542"float3 __ovld __cnfn cbrt(float3);\n"
33543"float4 __ovld __cnfn cbrt(float4);\n"
33544"float8 __ovld __cnfn cbrt(float8);\n"
33545"float16 __ovld __cnfn cbrt(float16);\n"
33546"#ifdef cl_khr_fp64\n"
33547"double __ovld __cnfn cbrt(double);\n"
33548"double2 __ovld __cnfn cbrt(double2);\n"
33549"double3 __ovld __cnfn cbrt(double3);\n"
33550"double4 __ovld __cnfn cbrt(double4);\n"
33551"double8 __ovld __cnfn cbrt(double8);\n"
33552"double16 __ovld __cnfn cbrt(double16);\n"
33553"#endif //cl_khr_fp64\n"
33554"#ifdef cl_khr_fp16\n"
33555"half __ovld __cnfn cbrt(half);\n"
33556"half2 __ovld __cnfn cbrt(half2);\n"
33557"half3 __ovld __cnfn cbrt(half3);\n"
33558"half4 __ovld __cnfn cbrt(half4);\n"
33559"half8 __ovld __cnfn cbrt(half8);\n"
33560"half16 __ovld __cnfn cbrt(half16);\n"
33561"#endif //cl_khr_fp16\n"
33562"\n"
33563"/**\n"
33564" * Round to integral value using the round to positive\n"
33565" * infinity rounding mode.\n"
33566" */\n"
33567"float __ovld __cnfn ceil(float);\n"
33568"float2 __ovld __cnfn ceil(float2);\n"
33569"float3 __ovld __cnfn ceil(float3);\n"
33570"float4 __ovld __cnfn ceil(float4);\n"
33571"float8 __ovld __cnfn ceil(float8);\n"
33572"float16 __ovld __cnfn ceil(float16);\n"
33573"#ifdef cl_khr_fp64\n"
33574"double __ovld __cnfn ceil(double);\n"
33575"double2 __ovld __cnfn ceil(double2);\n"
33576"double3 __ovld __cnfn ceil(double3);\n"
33577"double4 __ovld __cnfn ceil(double4);\n"
33578"double8 __ovld __cnfn ceil(double8);\n"
33579"double16 __ovld __cnfn ceil(double16);\n"
33580"#endif //cl_khr_fp64\n"
33581"#ifdef cl_khr_fp16\n"
33582"half __ovld __cnfn ceil(half);\n"
33583"half2 __ovld __cnfn ceil(half2);\n"
33584"half3 __ovld __cnfn ceil(half3);\n"
33585"half4 __ovld __cnfn ceil(half4);\n"
33586"half8 __ovld __cnfn ceil(half8);\n"
33587"half16 __ovld __cnfn ceil(half16);\n"
33588"#endif //cl_khr_fp16\n"
33589"\n"
33590"/**\n"
33591" * Returns x with its sign changed to match the sign of y.\n"
33592" */\n"
33593"float __ovld __cnfn copysign(float x, float y);\n"
33594"float2 __ovld __cnfn copysign(float2 x, float2 y);\n"
33595"float3 __ovld __cnfn copysign(float3 x, float3 y);\n"
33596"float4 __ovld __cnfn copysign(float4 x, float4 y);\n"
33597"float8 __ovld __cnfn copysign(float8 x, float8 y);\n"
33598"float16 __ovld __cnfn copysign(float16 x, float16 y);\n"
33599"#ifdef cl_khr_fp64\n"
33600"double __ovld __cnfn copysign(double x, double y);\n"
33601"double2 __ovld __cnfn copysign(double2 x, double2 y);\n"
33602"double3 __ovld __cnfn copysign(double3 x, double3 y);\n"
33603"double4 __ovld __cnfn copysign(double4 x, double4 y);\n"
33604"double8 __ovld __cnfn copysign(double8 x, double8 y);\n"
33605"double16 __ovld __cnfn copysign(double16 x, double16 y);\n"
33606"#endif //cl_khr_fp64\n"
33607"#ifdef cl_khr_fp16\n"
33608"half __ovld __cnfn copysign(half x, half y);\n"
33609"half2 __ovld __cnfn copysign(half2 x, half2 y);\n"
33610"half3 __ovld __cnfn copysign(half3 x, half3 y);\n"
33611"half4 __ovld __cnfn copysign(half4 x, half4 y);\n"
33612"half8 __ovld __cnfn copysign(half8 x, half8 y);\n"
33613"half16 __ovld __cnfn copysign(half16 x, half16 y);\n"
33614"#endif //cl_khr_fp16\n"
33615"\n"
33616"/**\n"
33617" * Compute cosine.\n"
33618" */\n"
33619"float __ovld __cnfn cos(float);\n"
33620"float2 __ovld __cnfn cos(float2);\n"
33621"float3 __ovld __cnfn cos(float3);\n"
33622"float4 __ovld __cnfn cos(float4);\n"
33623"float8 __ovld __cnfn cos(float8);\n"
33624"float16 __ovld __cnfn cos(float16);\n"
33625"#ifdef cl_khr_fp64\n"
33626"double __ovld __cnfn cos(double);\n"
33627"double2 __ovld __cnfn cos(double2);\n"
33628"double3 __ovld __cnfn cos(double3);\n"
33629"double4 __ovld __cnfn cos(double4);\n"
33630"double8 __ovld __cnfn cos(double8);\n"
33631"double16 __ovld __cnfn cos(double16);\n"
33632"#endif //cl_khr_fp64\n"
33633"#ifdef cl_khr_fp16\n"
33634"half __ovld __cnfn cos(half);\n"
33635"half2 __ovld __cnfn cos(half2);\n"
33636"half3 __ovld __cnfn cos(half3);\n"
33637"half4 __ovld __cnfn cos(half4);\n"
33638"half8 __ovld __cnfn cos(half8);\n"
33639"half16 __ovld __cnfn cos(half16);\n"
33640"#endif //cl_khr_fp16\n"
33641"\n"
33642"/**\n"
33643" * Compute hyperbolic cosine.\n"
33644" */\n"
33645"float __ovld __cnfn cosh(float);\n"
33646"float2 __ovld __cnfn cosh(float2);\n"
33647"float3 __ovld __cnfn cosh(float3);\n"
33648"float4 __ovld __cnfn cosh(float4);\n"
33649"float8 __ovld __cnfn cosh(float8);\n"
33650"float16 __ovld __cnfn cosh(float16);\n"
33651"#ifdef cl_khr_fp64\n"
33652"double __ovld __cnfn cosh(double);\n"
33653"double2 __ovld __cnfn cosh(double2);\n"
33654"double3 __ovld __cnfn cosh(double3);\n"
33655"double4 __ovld __cnfn cosh(double4);\n"
33656"double8 __ovld __cnfn cosh(double8);\n"
33657"double16 __ovld __cnfn cosh(double16);\n"
33658"#endif //cl_khr_fp64\n"
33659"#ifdef cl_khr_fp16\n"
33660"half __ovld __cnfn cosh(half);\n"
33661"half2 __ovld __cnfn cosh(half2);\n"
33662"half3 __ovld __cnfn cosh(half3);\n"
33663"half4 __ovld __cnfn cosh(half4);\n"
33664"half8 __ovld __cnfn cosh(half8);\n"
33665"half16 __ovld __cnfn cosh(half16);\n"
33666"#endif //cl_khr_fp16\n"
33667"\n"
33668"/**\n"
33669" * Compute cos (PI * x).\n"
33670" */\n"
33671"float __ovld __cnfn cospi(float x);\n"
33672"float2 __ovld __cnfn cospi(float2 x);\n"
33673"float3 __ovld __cnfn cospi(float3 x);\n"
33674"float4 __ovld __cnfn cospi(float4 x);\n"
33675"float8 __ovld __cnfn cospi(float8 x);\n"
33676"float16 __ovld __cnfn cospi(float16 x);\n"
33677"#ifdef cl_khr_fp64\n"
33678"double __ovld __cnfn cospi(double x);\n"
33679"double2 __ovld __cnfn cospi(double2 x);\n"
33680"double3 __ovld __cnfn cospi(double3 x);\n"
33681"double4 __ovld __cnfn cospi(double4 x);\n"
33682"double8 __ovld __cnfn cospi(double8 x);\n"
33683"double16 __ovld __cnfn cospi(double16 x);\n"
33684"#endif //cl_khr_fp64\n"
33685"#ifdef cl_khr_fp16\n"
33686"half __ovld __cnfn cospi(half x);\n"
33687"half2 __ovld __cnfn cospi(half2 x);\n"
33688"half3 __ovld __cnfn cospi(half3 x);\n"
33689"half4 __ovld __cnfn cospi(half4 x);\n"
33690"half8 __ovld __cnfn cospi(half8 x);\n"
33691"half16 __ovld __cnfn cospi(half16 x);\n"
33692"#endif //cl_khr_fp16\n"
33693"\n"
33694"/**\n"
33695" * Complementary error function.\n"
33696" */\n"
33697"float __ovld __cnfn erfc(float);\n"
33698"float2 __ovld __cnfn erfc(float2);\n"
33699"float3 __ovld __cnfn erfc(float3);\n"
33700"float4 __ovld __cnfn erfc(float4);\n"
33701"float8 __ovld __cnfn erfc(float8);\n"
33702"float16 __ovld __cnfn erfc(float16);\n"
33703"#ifdef cl_khr_fp64\n"
33704"double __ovld __cnfn erfc(double);\n"
33705"double2 __ovld __cnfn erfc(double2);\n"
33706"double3 __ovld __cnfn erfc(double3);\n"
33707"double4 __ovld __cnfn erfc(double4);\n"
33708"double8 __ovld __cnfn erfc(double8);\n"
33709"double16 __ovld __cnfn erfc(double16);\n"
33710"#endif //cl_khr_fp64\n"
33711"#ifdef cl_khr_fp16\n"
33712"half __ovld __cnfn erfc(half);\n"
33713"half2 __ovld __cnfn erfc(half2);\n"
33714"half3 __ovld __cnfn erfc(half3);\n"
33715"half4 __ovld __cnfn erfc(half4);\n"
33716"half8 __ovld __cnfn erfc(half8);\n"
33717"half16 __ovld __cnfn erfc(half16);\n"
33718"#endif //cl_khr_fp16\n"
33719"\n"
33720"/**\n"
33721" * Error function encountered in integrating the\n"
33722" * normal distribution.\n"
33723" */\n"
33724"float __ovld __cnfn erf(float);\n"
33725"float2 __ovld __cnfn erf(float2);\n"
33726"float3 __ovld __cnfn erf(float3);\n"
33727"float4 __ovld __cnfn erf(float4);\n"
33728"float8 __ovld __cnfn erf(float8);\n"
33729"float16 __ovld __cnfn erf(float16);\n"
33730"#ifdef cl_khr_fp64\n"
33731"double __ovld __cnfn erf(double);\n"
33732"double2 __ovld __cnfn erf(double2);\n"
33733"double3 __ovld __cnfn erf(double3);\n"
33734"double4 __ovld __cnfn erf(double4);\n"
33735"double8 __ovld __cnfn erf(double8);\n"
33736"double16 __ovld __cnfn erf(double16);\n"
33737"#endif //cl_khr_fp64\n"
33738"#ifdef cl_khr_fp16\n"
33739"half __ovld __cnfn erf(half);\n"
33740"half2 __ovld __cnfn erf(half2);\n"
33741"half3 __ovld __cnfn erf(half3);\n"
33742"half4 __ovld __cnfn erf(half4);\n"
33743"half8 __ovld __cnfn erf(half8);\n"
33744"half16 __ovld __cnfn erf(half16);\n"
33745"#endif //cl_khr_fp16\n"
33746"\n"
33747"/**\n"
33748" * Compute the base e exponential function of x.\n"
33749" */\n"
33750"float __ovld __cnfn exp(float x);\n"
33751"float2 __ovld __cnfn exp(float2 x);\n"
33752"float3 __ovld __cnfn exp(float3 x);\n"
33753"float4 __ovld __cnfn exp(float4 x);\n"
33754"float8 __ovld __cnfn exp(float8 x);\n"
33755"float16 __ovld __cnfn exp(float16 x);\n"
33756"#ifdef cl_khr_fp64\n"
33757"double __ovld __cnfn exp(double x);\n"
33758"double2 __ovld __cnfn exp(double2 x);\n"
33759"double3 __ovld __cnfn exp(double3 x);\n"
33760"double4 __ovld __cnfn exp(double4 x);\n"
33761"double8 __ovld __cnfn exp(double8 x);\n"
33762"double16 __ovld __cnfn exp(double16 x);\n"
33763"#endif //cl_khr_fp64\n"
33764"#ifdef cl_khr_fp16\n"
33765"half __ovld __cnfn exp(half x);\n"
33766"half2 __ovld __cnfn exp(half2 x);\n"
33767"half3 __ovld __cnfn exp(half3 x);\n"
33768"half4 __ovld __cnfn exp(half4 x);\n"
33769"half8 __ovld __cnfn exp(half8 x);\n"
33770"half16 __ovld __cnfn exp(half16 x);\n"
33771"#endif //cl_khr_fp16\n"
33772"\n"
33773"/**\n"
33774" * Exponential base 2 function.\n"
33775" */\n"
33776"float __ovld __cnfn exp2(float);\n"
33777"float2 __ovld __cnfn exp2(float2);\n"
33778"float3 __ovld __cnfn exp2(float3);\n"
33779"float4 __ovld __cnfn exp2(float4);\n"
33780"float8 __ovld __cnfn exp2(float8);\n"
33781"float16 __ovld __cnfn exp2(float16);\n"
33782"#ifdef cl_khr_fp64\n"
33783"double __ovld __cnfn exp2(double);\n"
33784"double2 __ovld __cnfn exp2(double2);\n"
33785"double3 __ovld __cnfn exp2(double3);\n"
33786"double4 __ovld __cnfn exp2(double4);\n"
33787"double8 __ovld __cnfn exp2(double8);\n"
33788"double16 __ovld __cnfn exp2(double16);\n"
33789"#endif //cl_khr_fp64\n"
33790"#ifdef cl_khr_fp16\n"
33791"half __ovld __cnfn exp2(half);\n"
33792"half2 __ovld __cnfn exp2(half2);\n"
33793"half3 __ovld __cnfn exp2(half3);\n"
33794"half4 __ovld __cnfn exp2(half4);\n"
33795"half8 __ovld __cnfn exp2(half8);\n"
33796"half16 __ovld __cnfn exp2(half16);\n"
33797"#endif //cl_khr_fp16\n"
33798"\n"
33799"/**\n"
33800" * Exponential base 10 function.\n"
33801" */\n"
33802"float __ovld __cnfn exp10(float);\n"
33803"float2 __ovld __cnfn exp10(float2);\n"
33804"float3 __ovld __cnfn exp10(float3);\n"
33805"float4 __ovld __cnfn exp10(float4);\n"
33806"float8 __ovld __cnfn exp10(float8);\n"
33807"float16 __ovld __cnfn exp10(float16);\n"
33808"#ifdef cl_khr_fp64\n"
33809"double __ovld __cnfn exp10(double);\n"
33810"double2 __ovld __cnfn exp10(double2);\n"
33811"double3 __ovld __cnfn exp10(double3);\n"
33812"double4 __ovld __cnfn exp10(double4);\n"
33813"double8 __ovld __cnfn exp10(double8);\n"
33814"double16 __ovld __cnfn exp10(double16);\n"
33815"#endif //cl_khr_fp64\n"
33816"#ifdef cl_khr_fp16\n"
33817"half __ovld __cnfn exp10(half);\n"
33818"half2 __ovld __cnfn exp10(half2);\n"
33819"half3 __ovld __cnfn exp10(half3);\n"
33820"half4 __ovld __cnfn exp10(half4);\n"
33821"half8 __ovld __cnfn exp10(half8);\n"
33822"half16 __ovld __cnfn exp10(half16);\n"
33823"#endif //cl_khr_fp16\n"
33824"\n"
33825"/**\n"
33826" * Compute e^x- 1.0.\n"
33827" */\n"
33828"float __ovld __cnfn expm1(float x);\n"
33829"float2 __ovld __cnfn expm1(float2 x);\n"
33830"float3 __ovld __cnfn expm1(float3 x);\n"
33831"float4 __ovld __cnfn expm1(float4 x);\n"
33832"float8 __ovld __cnfn expm1(float8 x);\n"
33833"float16 __ovld __cnfn expm1(float16 x);\n"
33834"#ifdef cl_khr_fp64\n"
33835"double __ovld __cnfn expm1(double x);\n"
33836"double2 __ovld __cnfn expm1(double2 x);\n"
33837"double3 __ovld __cnfn expm1(double3 x);\n"
33838"double4 __ovld __cnfn expm1(double4 x);\n"
33839"double8 __ovld __cnfn expm1(double8 x);\n"
33840"double16 __ovld __cnfn expm1(double16 x);\n"
33841"#endif //cl_khr_fp64\n"
33842"#ifdef cl_khr_fp16\n"
33843"half __ovld __cnfn expm1(half x);\n"
33844"half2 __ovld __cnfn expm1(half2 x);\n"
33845"half3 __ovld __cnfn expm1(half3 x);\n"
33846"half4 __ovld __cnfn expm1(half4 x);\n"
33847"half8 __ovld __cnfn expm1(half8 x);\n"
33848"half16 __ovld __cnfn expm1(half16 x);\n"
33849"#endif //cl_khr_fp16\n"
33850"\n"
33851"/**\n"
33852" * Compute absolute value of a floating-point number.\n"
33853" */\n"
33854"float __ovld __cnfn fabs(float);\n"
33855"float2 __ovld __cnfn fabs(float2);\n"
33856"float3 __ovld __cnfn fabs(float3);\n"
33857"float4 __ovld __cnfn fabs(float4);\n"
33858"float8 __ovld __cnfn fabs(float8);\n"
33859"float16 __ovld __cnfn fabs(float16);\n"
33860"#ifdef cl_khr_fp64\n"
33861"double __ovld __cnfn fabs(double);\n"
33862"double2 __ovld __cnfn fabs(double2);\n"
33863"double3 __ovld __cnfn fabs(double3);\n"
33864"double4 __ovld __cnfn fabs(double4);\n"
33865"double8 __ovld __cnfn fabs(double8);\n"
33866"double16 __ovld __cnfn fabs(double16);\n"
33867"#endif //cl_khr_fp64\n"
33868"#ifdef cl_khr_fp16\n"
33869"half __ovld __cnfn fabs(half);\n"
33870"half2 __ovld __cnfn fabs(half2);\n"
33871"half3 __ovld __cnfn fabs(half3);\n"
33872"half4 __ovld __cnfn fabs(half4);\n"
33873"half8 __ovld __cnfn fabs(half8);\n"
33874"half16 __ovld __cnfn fabs(half16);\n"
33875"#endif //cl_khr_fp16\n"
33876"\n"
33877"/**\n"
33878" * x - y if x > y, +0 if x is less than or equal to y.\n"
33879" */\n"
33880"float __ovld __cnfn fdim(float x, float y);\n"
33881"float2 __ovld __cnfn fdim(float2 x, float2 y);\n"
33882"float3 __ovld __cnfn fdim(float3 x, float3 y);\n"
33883"float4 __ovld __cnfn fdim(float4 x, float4 y);\n"
33884"float8 __ovld __cnfn fdim(float8 x, float8 y);\n"
33885"float16 __ovld __cnfn fdim(float16 x, float16 y);\n"
33886"#ifdef cl_khr_fp64\n"
33887"double __ovld __cnfn fdim(double x, double y);\n"
33888"double2 __ovld __cnfn fdim(double2 x, double2 y);\n"
33889"double3 __ovld __cnfn fdim(double3 x, double3 y);\n"
33890"double4 __ovld __cnfn fdim(double4 x, double4 y);\n"
33891"double8 __ovld __cnfn fdim(double8 x, double8 y);\n"
33892"double16 __ovld __cnfn fdim(double16 x, double16 y);\n"
33893"#endif //cl_khr_fp64\n"
33894"#ifdef cl_khr_fp16\n"
33895"half __ovld __cnfn fdim(half x, half y);\n"
33896"half2 __ovld __cnfn fdim(half2 x, half2 y);\n"
33897"half3 __ovld __cnfn fdim(half3 x, half3 y);\n"
33898"half4 __ovld __cnfn fdim(half4 x, half4 y);\n"
33899"half8 __ovld __cnfn fdim(half8 x, half8 y);\n"
33900"half16 __ovld __cnfn fdim(half16 x, half16 y);\n"
33901"#endif //cl_khr_fp16\n"
33902"\n"
33903"/**\n"
33904" * Round to integral value using the round to -ve\n"
33905" * infinity rounding mode.\n"
33906" */\n"
33907"float __ovld __cnfn floor(float);\n"
33908"float2 __ovld __cnfn floor(float2);\n"
33909"float3 __ovld __cnfn floor(float3);\n"
33910"float4 __ovld __cnfn floor(float4);\n"
33911"float8 __ovld __cnfn floor(float8);\n"
33912"float16 __ovld __cnfn floor(float16);\n"
33913"#ifdef cl_khr_fp64\n"
33914"double __ovld __cnfn floor(double);\n"
33915"double2 __ovld __cnfn floor(double2);\n"
33916"double3 __ovld __cnfn floor(double3);\n"
33917"double4 __ovld __cnfn floor(double4);\n"
33918"double8 __ovld __cnfn floor(double8);\n"
33919"double16 __ovld __cnfn floor(double16);\n"
33920"#endif //cl_khr_fp64\n"
33921"#ifdef cl_khr_fp16\n"
33922"half __ovld __cnfn floor(half);\n"
33923"half2 __ovld __cnfn floor(half2);\n"
33924"half3 __ovld __cnfn floor(half3);\n"
33925"half4 __ovld __cnfn floor(half4);\n"
33926"half8 __ovld __cnfn floor(half8);\n"
33927"half16 __ovld __cnfn floor(half16);\n"
33928"#endif //cl_khr_fp16\n"
33929"\n"
33930"/**\n"
33931" * Returns the correctly rounded floating-point\n"
33932" * representation of the sum of c with the infinitely\n"
33933" * precise product of a and b. Rounding of\n"
33934" * intermediate products shall not occur. Edge case\n"
33935" * behavior is per the IEEE 754-2008 standard.\n"
33936" */\n"
33937"float __ovld __cnfn fma(float a, float b, float c);\n"
33938"float2 __ovld __cnfn fma(float2 a, float2 b, float2 c);\n"
33939"float3 __ovld __cnfn fma(float3 a, float3 b, float3 c);\n"
33940"float4 __ovld __cnfn fma(float4 a, float4 b, float4 c);\n"
33941"float8 __ovld __cnfn fma(float8 a, float8 b, float8 c);\n"
33942"float16 __ovld __cnfn fma(float16 a, float16 b, float16 c);\n"
33943"#ifdef cl_khr_fp64\n"
33944"double __ovld __cnfn fma(double a, double b, double c);\n"
33945"double2 __ovld __cnfn fma(double2 a, double2 b, double2 c);\n"
33946"double3 __ovld __cnfn fma(double3 a, double3 b, double3 c);\n"
33947"double4 __ovld __cnfn fma(double4 a, double4 b, double4 c);\n"
33948"double8 __ovld __cnfn fma(double8 a, double8 b, double8 c);\n"
33949"double16 __ovld __cnfn fma(double16 a, double16 b, double16 c);\n"
33950"#endif //cl_khr_fp64\n"
33951"#ifdef cl_khr_fp16\n"
33952"half __ovld __cnfn fma(half a, half b, half c);\n"
33953"half2 __ovld __cnfn fma(half2 a, half2 b, half2 c);\n"
33954"half3 __ovld __cnfn fma(half3 a, half3 b, half3 c);\n"
33955"half4 __ovld __cnfn fma(half4 a, half4 b, half4 c);\n"
33956"half8 __ovld __cnfn fma(half8 a, half8 b, half8 c);\n"
33957"half16 __ovld __cnfn fma(half16 a, half16 b, half16 c);\n"
33958"#endif //cl_khr_fp16\n"
33959"\n"
33960"/**\n"
33961" * Returns y if x < y, otherwise it returns x. If one\n"
33962" * argument is a NaN, fmax() returns the other\n"
33963" * argument. If both arguments are NaNs, fmax()\n"
33964" * returns a NaN.\n"
33965" */\n"
33966"float __ovld __cnfn fmax(float x, float y);\n"
33967"float2 __ovld __cnfn fmax(float2 x, float2 y);\n"
33968"float3 __ovld __cnfn fmax(float3 x, float3 y);\n"
33969"float4 __ovld __cnfn fmax(float4 x, float4 y);\n"
33970"float8 __ovld __cnfn fmax(float8 x, float8 y);\n"
33971"float16 __ovld __cnfn fmax(float16 x, float16 y);\n"
33972"float2 __ovld __cnfn fmax(float2 x, float y);\n"
33973"float3 __ovld __cnfn fmax(float3 x, float y);\n"
33974"float4 __ovld __cnfn fmax(float4 x, float y);\n"
33975"float8 __ovld __cnfn fmax(float8 x, float y);\n"
33976"float16 __ovld __cnfn fmax(float16 x, float y);\n"
33977"#ifdef cl_khr_fp64\n"
33978"double __ovld __cnfn fmax(double x, double y);\n"
33979"double2 __ovld __cnfn fmax(double2 x, double2 y);\n"
33980"double3 __ovld __cnfn fmax(double3 x, double3 y);\n"
33981"double4 __ovld __cnfn fmax(double4 x, double4 y);\n"
33982"double8 __ovld __cnfn fmax(double8 x, double8 y);\n"
33983"double16 __ovld __cnfn fmax(double16 x, double16 y);\n"
33984"double2 __ovld __cnfn fmax(double2 x, double y);\n"
33985"double3 __ovld __cnfn fmax(double3 x, double y);\n"
33986"double4 __ovld __cnfn fmax(double4 x, double y);\n"
33987"double8 __ovld __cnfn fmax(double8 x, double y);\n"
33988"double16 __ovld __cnfn fmax(double16 x, double y);\n"
33989"#endif //cl_khr_fp64\n"
33990"#ifdef cl_khr_fp16\n"
33991"half __ovld __cnfn fmax(half x, half y);\n"
33992"half2 __ovld __cnfn fmax(half2 x, half2 y);\n"
33993"half3 __ovld __cnfn fmax(half3 x, half3 y);\n"
33994"half4 __ovld __cnfn fmax(half4 x, half4 y);\n"
33995"half8 __ovld __cnfn fmax(half8 x, half8 y);\n"
33996"half16 __ovld __cnfn fmax(half16 x, half16 y);\n"
33997"half2 __ovld __cnfn fmax(half2 x, half y);\n"
33998"half3 __ovld __cnfn fmax(half3 x, half y);\n"
33999"half4 __ovld __cnfn fmax(half4 x, half y);\n"
34000"half8 __ovld __cnfn fmax(half8 x, half y);\n"
34001"half16 __ovld __cnfn fmax(half16 x, half y);\n"
34002"#endif //cl_khr_fp16\n"
34003"\n"
34004"/**\n"
34005" * Returns y if y < x, otherwise it returns x. If one\n"
34006" * argument is a NaN, fmin() returns the other\n"
34007" * argument. If both arguments are NaNs, fmin()\n"
34008" * returns a NaN.\n"
34009" */\n"
34010"float __ovld __cnfn fmin(float x, float y);\n"
34011"float2 __ovld __cnfn fmin(float2 x, float2 y);\n"
34012"float3 __ovld __cnfn fmin(float3 x, float3 y);\n"
34013"float4 __ovld __cnfn fmin(float4 x, float4 y);\n"
34014"float8 __ovld __cnfn fmin(float8 x, float8 y);\n"
34015"float16 __ovld __cnfn fmin(float16 x, float16 y);\n"
34016"float2 __ovld __cnfn fmin(float2 x, float y);\n"
34017"float3 __ovld __cnfn fmin(float3 x, float y);\n"
34018"float4 __ovld __cnfn fmin(float4 x, float y);\n"
34019"float8 __ovld __cnfn fmin(float8 x, float y);\n"
34020"float16 __ovld __cnfn fmin(float16 x, float y);\n"
34021"#ifdef cl_khr_fp64\n"
34022"double __ovld __cnfn fmin(double x, double y);\n"
34023"double2 __ovld __cnfn fmin(double2 x, double2 y);\n"
34024"double3 __ovld __cnfn fmin(double3 x, double3 y);\n"
34025"double4 __ovld __cnfn fmin(double4 x, double4 y);\n"
34026"double8 __ovld __cnfn fmin(double8 x, double8 y);\n"
34027"double16 __ovld __cnfn fmin(double16 x, double16 y);\n"
34028"double2 __ovld __cnfn fmin(double2 x, double y);\n"
34029"double3 __ovld __cnfn fmin(double3 x, double y);\n"
34030"double4 __ovld __cnfn fmin(double4 x, double y);\n"
34031"double8 __ovld __cnfn fmin(double8 x, double y);\n"
34032"double16 __ovld __cnfn fmin(double16 x, double y);\n"
34033"#endif //cl_khr_fp64\n"
34034"#ifdef cl_khr_fp16\n"
34035"half __ovld __cnfn fmin(half x, half y);\n"
34036"half2 __ovld __cnfn fmin(half2 x, half2 y);\n"
34037"half3 __ovld __cnfn fmin(half3 x, half3 y);\n"
34038"half4 __ovld __cnfn fmin(half4 x, half4 y);\n"
34039"half8 __ovld __cnfn fmin(half8 x, half8 y);\n"
34040"half16 __ovld __cnfn fmin(half16 x, half16 y);\n"
34041"half2 __ovld __cnfn fmin(half2 x, half y);\n"
34042"half3 __ovld __cnfn fmin(half3 x, half y);\n"
34043"half4 __ovld __cnfn fmin(half4 x, half y);\n"
34044"half8 __ovld __cnfn fmin(half8 x, half y);\n"
34045"half16 __ovld __cnfn fmin(half16 x, half y);\n"
34046"#endif //cl_khr_fp16\n"
34047"\n"
34048"/**\n"
34049" * Modulus. Returns x - y * trunc (x/y).\n"
34050" */\n"
34051"float __ovld __cnfn fmod(float x, float y);\n"
34052"float2 __ovld __cnfn fmod(float2 x, float2 y);\n"
34053"float3 __ovld __cnfn fmod(float3 x, float3 y);\n"
34054"float4 __ovld __cnfn fmod(float4 x, float4 y);\n"
34055"float8 __ovld __cnfn fmod(float8 x, float8 y);\n"
34056"float16 __ovld __cnfn fmod(float16 x, float16 y);\n"
34057"#ifdef cl_khr_fp64\n"
34058"double __ovld __cnfn fmod(double x, double y);\n"
34059"double2 __ovld __cnfn fmod(double2 x, double2 y);\n"
34060"double3 __ovld __cnfn fmod(double3 x, double3 y);\n"
34061"double4 __ovld __cnfn fmod(double4 x, double4 y);\n"
34062"double8 __ovld __cnfn fmod(double8 x, double8 y);\n"
34063"double16 __ovld __cnfn fmod(double16 x, double16 y);\n"
34064"#endif //cl_khr_fp64\n"
34065"#ifdef cl_khr_fp16\n"
34066"half __ovld __cnfn fmod(half x, half y);\n"
34067"half2 __ovld __cnfn fmod(half2 x, half2 y);\n"
34068"half3 __ovld __cnfn fmod(half3 x, half3 y);\n"
34069"half4 __ovld __cnfn fmod(half4 x, half4 y);\n"
34070"half8 __ovld __cnfn fmod(half8 x, half8 y);\n"
34071"half16 __ovld __cnfn fmod(half16 x, half16 y);\n"
34072"#endif //cl_khr_fp16\n"
34073"\n"
34074"/**\n"
34075" * Returns fmin(x - floor (x), 0x1.fffffep-1f ).\n"
34076" * floor(x) is returned in iptr.\n"
34077" */\n"
34078"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
34079"float __ovld fract(float x, float *iptr);\n"
34080"float2 __ovld fract(float2 x, float2 *iptr);\n"
34081"float3 __ovld fract(float3 x, float3 *iptr);\n"
34082"float4 __ovld fract(float4 x, float4 *iptr);\n"
34083"float8 __ovld fract(float8 x, float8 *iptr);\n"
34084"float16 __ovld fract(float16 x, float16 *iptr);\n"
34085"#ifdef cl_khr_fp64\n"
34086"double __ovld fract(double x, double *iptr);\n"
34087"double2 __ovld fract(double2 x, double2 *iptr);\n"
34088"double3 __ovld fract(double3 x, double3 *iptr);\n"
34089"double4 __ovld fract(double4 x, double4 *iptr);\n"
34090"double8 __ovld fract(double8 x, double8 *iptr);\n"
34091"double16 __ovld fract(double16 x, double16 *iptr);\n"
34092"#endif //cl_khr_fp64\n"
34093"#ifdef cl_khr_fp16\n"
34094"half __ovld fract(half x, half *iptr);\n"
34095"half2 __ovld fract(half2 x, half2 *iptr);\n"
34096"half3 __ovld fract(half3 x, half3 *iptr);\n"
34097"half4 __ovld fract(half4 x, half4 *iptr);\n"
34098"half8 __ovld fract(half8 x, half8 *iptr);\n"
34099"half16 __ovld fract(half16 x, half16 *iptr);\n"
34100"#endif //cl_khr_fp16\n"
34101"#else\n"
34102"float __ovld fract(float x, __global float *iptr);\n"
34103"float2 __ovld fract(float2 x, __global float2 *iptr);\n"
34104"float3 __ovld fract(float3 x, __global float3 *iptr);\n"
34105"float4 __ovld fract(float4 x, __global float4 *iptr);\n"
34106"float8 __ovld fract(float8 x, __global float8 *iptr);\n"
34107"float16 __ovld fract(float16 x, __global float16 *iptr);\n"
34108"float __ovld fract(float x, __local float *iptr);\n"
34109"float2 __ovld fract(float2 x, __local float2 *iptr);\n"
34110"float3 __ovld fract(float3 x, __local float3 *iptr);\n"
34111"float4 __ovld fract(float4 x, __local float4 *iptr);\n"
34112"float8 __ovld fract(float8 x, __local float8 *iptr);\n"
34113"float16 __ovld fract(float16 x, __local float16 *iptr);\n"
34114"float __ovld fract(float x, __private float *iptr);\n"
34115"float2 __ovld fract(float2 x, __private float2 *iptr);\n"
34116"float3 __ovld fract(float3 x, __private float3 *iptr);\n"
34117"float4 __ovld fract(float4 x, __private float4 *iptr);\n"
34118"float8 __ovld fract(float8 x, __private float8 *iptr);\n"
34119"float16 __ovld fract(float16 x, __private float16 *iptr);\n"
34120"#ifdef cl_khr_fp64\n"
34121"double __ovld fract(double x, __global double *iptr);\n"
34122"double2 __ovld fract(double2 x, __global double2 *iptr);\n"
34123"double3 __ovld fract(double3 x, __global double3 *iptr);\n"
34124"double4 __ovld fract(double4 x, __global double4 *iptr);\n"
34125"double8 __ovld fract(double8 x, __global double8 *iptr);\n"
34126"double16 __ovld fract(double16 x, __global double16 *iptr);\n"
34127"double __ovld fract(double x, __local double *iptr);\n"
34128"double2 __ovld fract(double2 x, __local double2 *iptr);\n"
34129"double3 __ovld fract(double3 x, __local double3 *iptr);\n"
34130"double4 __ovld fract(double4 x, __local double4 *iptr);\n"
34131"double8 __ovld fract(double8 x, __local double8 *iptr);\n"
34132"double16 __ovld fract(double16 x, __local double16 *iptr);\n"
34133"double __ovld fract(double x, __private double *iptr);\n"
34134"double2 __ovld fract(double2 x, __private double2 *iptr);\n"
34135"double3 __ovld fract(double3 x, __private double3 *iptr);\n"
34136"double4 __ovld fract(double4 x, __private double4 *iptr);\n"
34137"double8 __ovld fract(double8 x, __private double8 *iptr);\n"
34138"double16 __ovld fract(double16 x, __private double16 *iptr);\n"
34139"#endif //cl_khr_fp64\n"
34140"#ifdef cl_khr_fp16\n"
34141"half __ovld fract(half x, __global half *iptr);\n"
34142"half2 __ovld fract(half2 x, __global half2 *iptr);\n"
34143"half3 __ovld fract(half3 x, __global half3 *iptr);\n"
34144"half4 __ovld fract(half4 x, __global half4 *iptr);\n"
34145"half8 __ovld fract(half8 x, __global half8 *iptr);\n"
34146"half16 __ovld fract(half16 x, __global half16 *iptr);\n"
34147"half __ovld fract(half x, __local half *iptr);\n"
34148"half2 __ovld fract(half2 x, __local half2 *iptr);\n"
34149"half3 __ovld fract(half3 x, __local half3 *iptr);\n"
34150"half4 __ovld fract(half4 x, __local half4 *iptr);\n"
34151"half8 __ovld fract(half8 x, __local half8 *iptr);\n"
34152"half16 __ovld fract(half16 x, __local half16 *iptr);\n"
34153"half __ovld fract(half x, __private half *iptr);\n"
34154"half2 __ovld fract(half2 x, __private half2 *iptr);\n"
34155"half3 __ovld fract(half3 x, __private half3 *iptr);\n"
34156"half4 __ovld fract(half4 x, __private half4 *iptr);\n"
34157"half8 __ovld fract(half8 x, __private half8 *iptr);\n"
34158"half16 __ovld fract(half16 x, __private half16 *iptr);\n"
34159"#endif //cl_khr_fp16\n"
34160"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
34161"\n"
34162"/**\n"
34163" * Extract mantissa and exponent from x. For each\n"
34164" * component the mantissa returned is a float with\n"
34165" * magnitude in the interval [1/2, 1) or 0. Each\n"
34166" * component of x equals mantissa returned * 2^exp.\n"
34167" */\n"
34168"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
34169"float __ovld frexp(float x, int *exp);\n"
34170"float2 __ovld frexp(float2 x, int2 *exp);\n"
34171"float3 __ovld frexp(float3 x, int3 *exp);\n"
34172"float4 __ovld frexp(float4 x, int4 *exp);\n"
34173"float8 __ovld frexp(float8 x, int8 *exp);\n"
34174"float16 __ovld frexp(float16 x, int16 *exp);\n"
34175"#ifdef cl_khr_fp64\n"
34176"double __ovld frexp(double x, int *exp);\n"
34177"double2 __ovld frexp(double2 x, int2 *exp);\n"
34178"double3 __ovld frexp(double3 x, int3 *exp);\n"
34179"double4 __ovld frexp(double4 x, int4 *exp);\n"
34180"double8 __ovld frexp(double8 x, int8 *exp);\n"
34181"double16 __ovld frexp(double16 x, int16 *exp);\n"
34182"#endif //cl_khr_fp64\n"
34183"#ifdef cl_khr_fp16\n"
34184"half __ovld frexp(half x, int *exp);\n"
34185"half2 __ovld frexp(half2 x, int2 *exp);\n"
34186"half3 __ovld frexp(half3 x, int3 *exp);\n"
34187"half4 __ovld frexp(half4 x, int4 *exp);\n"
34188"half8 __ovld frexp(half8 x, int8 *exp);\n"
34189"half16 __ovld frexp(half16 x, int16 *exp);\n"
34190"#endif //cl_khr_fp16\n"
34191"#else\n"
34192"float __ovld frexp(float x, __global int *exp);\n"
34193"float2 __ovld frexp(float2 x, __global int2 *exp);\n"
34194"float3 __ovld frexp(float3 x, __global int3 *exp);\n"
34195"float4 __ovld frexp(float4 x, __global int4 *exp);\n"
34196"float8 __ovld frexp(float8 x, __global int8 *exp);\n"
34197"float16 __ovld frexp(float16 x, __global int16 *exp);\n"
34198"float __ovld frexp(float x, __local int *exp);\n"
34199"float2 __ovld frexp(float2 x, __local int2 *exp);\n"
34200"float3 __ovld frexp(float3 x, __local int3 *exp);\n"
34201"float4 __ovld frexp(float4 x, __local int4 *exp);\n"
34202"float8 __ovld frexp(float8 x, __local int8 *exp);\n"
34203"float16 __ovld frexp(float16 x, __local int16 *exp);\n"
34204"float __ovld frexp(float x, __private int *exp);\n"
34205"float2 __ovld frexp(float2 x, __private int2 *exp);\n"
34206"float3 __ovld frexp(float3 x, __private int3 *exp);\n"
34207"float4 __ovld frexp(float4 x, __private int4 *exp);\n"
34208"float8 __ovld frexp(float8 x, __private int8 *exp);\n"
34209"float16 __ovld frexp(float16 x, __private int16 *exp);\n"
34210"#ifdef cl_khr_fp64\n"
34211"double __ovld frexp(double x, __global int *exp);\n"
34212"double2 __ovld frexp(double2 x, __global int2 *exp);\n"
34213"double3 __ovld frexp(double3 x, __global int3 *exp);\n"
34214"double4 __ovld frexp(double4 x, __global int4 *exp);\n"
34215"double8 __ovld frexp(double8 x, __global int8 *exp);\n"
34216"double16 __ovld frexp(double16 x, __global int16 *exp);\n"
34217"double __ovld frexp(double x, __local int *exp);\n"
34218"double2 __ovld frexp(double2 x, __local int2 *exp);\n"
34219"double3 __ovld frexp(double3 x, __local int3 *exp);\n"
34220"double4 __ovld frexp(double4 x, __local int4 *exp);\n"
34221"double8 __ovld frexp(double8 x, __local int8 *exp);\n"
34222"double16 __ovld frexp(double16 x, __local int16 *exp);\n"
34223"double __ovld frexp(double x, __private int *exp);\n"
34224"double2 __ovld frexp(double2 x, __private int2 *exp);\n"
34225"double3 __ovld frexp(double3 x, __private int3 *exp);\n"
34226"double4 __ovld frexp(double4 x, __private int4 *exp);\n"
34227"double8 __ovld frexp(double8 x, __private int8 *exp);\n"
34228"double16 __ovld frexp(double16 x, __private int16 *exp);\n"
34229"#endif //cl_khr_fp64\n"
34230"#ifdef cl_khr_fp16\n"
34231"half __ovld frexp(half x, __global int *exp);\n"
34232"half2 __ovld frexp(half2 x, __global int2 *exp);\n"
34233"half3 __ovld frexp(half3 x, __global int3 *exp);\n"
34234"half4 __ovld frexp(half4 x, __global int4 *exp);\n"
34235"half8 __ovld frexp(half8 x, __global int8 *exp);\n"
34236"half16 __ovld frexp(half16 x, __global int16 *exp);\n"
34237"half __ovld frexp(half x, __local int *exp);\n"
34238"half2 __ovld frexp(half2 x, __local int2 *exp);\n"
34239"half3 __ovld frexp(half3 x, __local int3 *exp);\n"
34240"half4 __ovld frexp(half4 x, __local int4 *exp);\n"
34241"half8 __ovld frexp(half8 x, __local int8 *exp);\n"
34242"half16 __ovld frexp(half16 x, __local int16 *exp);\n"
34243"half __ovld frexp(half x, __private int *exp);\n"
34244"half2 __ovld frexp(half2 x, __private int2 *exp);\n"
34245"half3 __ovld frexp(half3 x, __private int3 *exp);\n"
34246"half4 __ovld frexp(half4 x, __private int4 *exp);\n"
34247"half8 __ovld frexp(half8 x, __private int8 *exp);\n"
34248"half16 __ovld frexp(half16 x, __private int16 *exp);\n"
34249"#endif //cl_khr_fp16\n"
34250"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
34251"\n"
34252"/**\n"
34253" * Compute the value of the square root of x^2 + y^2\n"
34254" * without undue overflow or underflow.\n"
34255" */\n"
34256"float __ovld __cnfn hypot(float x, float y);\n"
34257"float2 __ovld __cnfn hypot(float2 x, float2 y);\n"
34258"float3 __ovld __cnfn hypot(float3 x, float3 y);\n"
34259"float4 __ovld __cnfn hypot(float4 x, float4 y);\n"
34260"float8 __ovld __cnfn hypot(float8 x, float8 y);\n"
34261"float16 __ovld __cnfn hypot(float16 x, float16 y);\n"
34262"#ifdef cl_khr_fp64\n"
34263"double __ovld __cnfn hypot(double x, double y);\n"
34264"double2 __ovld __cnfn hypot(double2 x, double2 y);\n"
34265"double3 __ovld __cnfn hypot(double3 x, double3 y);\n"
34266"double4 __ovld __cnfn hypot(double4 x, double4 y);\n"
34267"double8 __ovld __cnfn hypot(double8 x, double8 y);\n"
34268"double16 __ovld __cnfn hypot(double16 x, double16 y);\n"
34269"#endif //cl_khr_fp64\n"
34270"#ifdef cl_khr_fp16\n"
34271"half __ovld __cnfn hypot(half x, half y);\n"
34272"half2 __ovld __cnfn hypot(half2 x, half2 y);\n"
34273"half3 __ovld __cnfn hypot(half3 x, half3 y);\n"
34274"half4 __ovld __cnfn hypot(half4 x, half4 y);\n"
34275"half8 __ovld __cnfn hypot(half8 x, half8 y);\n"
34276"half16 __ovld __cnfn hypot(half16 x, half16 y);\n"
34277"#endif //cl_khr_fp16\n"
34278"\n"
34279"/**\n"
34280" * Return the exponent as an integer value.\n"
34281" */\n"
34282"int __ovld __cnfn ilogb(float x);\n"
34283"int2 __ovld __cnfn ilogb(float2 x);\n"
34284"int3 __ovld __cnfn ilogb(float3 x);\n"
34285"int4 __ovld __cnfn ilogb(float4 x);\n"
34286"int8 __ovld __cnfn ilogb(float8 x);\n"
34287"int16 __ovld __cnfn ilogb(float16 x);\n"
34288"#ifdef cl_khr_fp64\n"
34289"int __ovld __cnfn ilogb(double x);\n"
34290"int2 __ovld __cnfn ilogb(double2 x);\n"
34291"int3 __ovld __cnfn ilogb(double3 x);\n"
34292"int4 __ovld __cnfn ilogb(double4 x);\n"
34293"int8 __ovld __cnfn ilogb(double8 x);\n"
34294"int16 __ovld __cnfn ilogb(double16 x);\n"
34295"#endif //cl_khr_fp64\n"
34296"#ifdef cl_khr_fp16\n"
34297"int __ovld __cnfn ilogb(half x);\n"
34298"int2 __ovld __cnfn ilogb(half2 x);\n"
34299"int3 __ovld __cnfn ilogb(half3 x);\n"
34300"int4 __ovld __cnfn ilogb(half4 x);\n"
34301"int8 __ovld __cnfn ilogb(half8 x);\n"
34302"int16 __ovld __cnfn ilogb(half16 x);\n"
34303"#endif //cl_khr_fp16\n"
34304"\n"
34305"/**\n"
34306" * Multiply x by 2 to the power n.\n"
34307" */\n"
34308"float __ovld __cnfn ldexp(float x, int n);\n"
34309"float2 __ovld __cnfn ldexp(float2 x, int2 n);\n"
34310"float3 __ovld __cnfn ldexp(float3 x, int3 n);\n"
34311"float4 __ovld __cnfn ldexp(float4 x, int4 n);\n"
34312"float8 __ovld __cnfn ldexp(float8 x, int8 n);\n"
34313"float16 __ovld __cnfn ldexp(float16 x, int16 n);\n"
34314"float2 __ovld __cnfn ldexp(float2 x, int n);\n"
34315"float3 __ovld __cnfn ldexp(float3 x, int n);\n"
34316"float4 __ovld __cnfn ldexp(float4 x, int n);\n"
34317"float8 __ovld __cnfn ldexp(float8 x, int n);\n"
34318"float16 __ovld __cnfn ldexp(float16 x, int n);\n"
34319"#ifdef cl_khr_fp64\n"
34320"double __ovld __cnfn ldexp(double x, int n);\n"
34321"double2 __ovld __cnfn ldexp(double2 x, int2 n);\n"
34322"double3 __ovld __cnfn ldexp(double3 x, int3 n);\n"
34323"double4 __ovld __cnfn ldexp(double4 x, int4 n);\n"
34324"double8 __ovld __cnfn ldexp(double8 x, int8 n);\n"
34325"double16 __ovld __cnfn ldexp(double16 x, int16 n);\n"
34326"double2 __ovld __cnfn ldexp(double2 x, int n);\n"
34327"double3 __ovld __cnfn ldexp(double3 x, int n);\n"
34328"double4 __ovld __cnfn ldexp(double4 x, int n);\n"
34329"double8 __ovld __cnfn ldexp(double8 x, int n);\n"
34330"double16 __ovld __cnfn ldexp(double16 x, int n);\n"
34331"#endif //cl_khr_fp64\n"
34332"#ifdef cl_khr_fp16\n"
34333"half __ovld __cnfn ldexp(half x, int n);\n"
34334"half2 __ovld __cnfn ldexp(half2 x, int2 n);\n"
34335"half3 __ovld __cnfn ldexp(half3 x, int3 n);\n"
34336"half4 __ovld __cnfn ldexp(half4 x, int4 n);\n"
34337"half8 __ovld __cnfn ldexp(half8 x, int8 n);\n"
34338"half16 __ovld __cnfn ldexp(half16 x, int16 n);\n"
34339"half2 __ovld __cnfn ldexp(half2 x, int n);\n"
34340"half3 __ovld __cnfn ldexp(half3 x, int n);\n"
34341"half4 __ovld __cnfn ldexp(half4 x, int n);\n"
34342"half8 __ovld __cnfn ldexp(half8 x, int n);\n"
34343"half16 __ovld __cnfn ldexp(half16 x, int n);\n"
34344"#endif //cl_khr_fp16\n"
34345"\n"
34346"/**\n"
34347" * Log gamma function. Returns the natural\n"
34348" * logarithm of the absolute value of the gamma\n"
34349" * function. The sign of the gamma function is\n"
34350" * returned in the signp argument of lgamma_r.\n"
34351" */\n"
34352"float __ovld __cnfn lgamma(float x);\n"
34353"float2 __ovld __cnfn lgamma(float2 x);\n"
34354"float3 __ovld __cnfn lgamma(float3 x);\n"
34355"float4 __ovld __cnfn lgamma(float4 x);\n"
34356"float8 __ovld __cnfn lgamma(float8 x);\n"
34357"float16 __ovld __cnfn lgamma(float16 x);\n"
34358"#ifdef cl_khr_fp64\n"
34359"double __ovld __cnfn lgamma(double x);\n"
34360"double2 __ovld __cnfn lgamma(double2 x);\n"
34361"double3 __ovld __cnfn lgamma(double3 x);\n"
34362"double4 __ovld __cnfn lgamma(double4 x);\n"
34363"double8 __ovld __cnfn lgamma(double8 x);\n"
34364"double16 __ovld __cnfn lgamma(double16 x);\n"
34365"#endif //cl_khr_fp64\n"
34366"#ifdef cl_khr_fp16\n"
34367"half __ovld __cnfn lgamma(half x);\n"
34368"half2 __ovld __cnfn lgamma(half2 x);\n"
34369"half3 __ovld __cnfn lgamma(half3 x);\n"
34370"half4 __ovld __cnfn lgamma(half4 x);\n"
34371"half8 __ovld __cnfn lgamma(half8 x);\n"
34372"half16 __ovld __cnfn lgamma(half16 x);\n"
34373"#endif //cl_khr_fp16\n"
34374"\n"
34375"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
34376"float __ovld lgamma_r(float x, int *signp);\n"
34377"float2 __ovld lgamma_r(float2 x, int2 *signp);\n"
34378"float3 __ovld lgamma_r(float3 x, int3 *signp);\n"
34379"float4 __ovld lgamma_r(float4 x, int4 *signp);\n"
34380"float8 __ovld lgamma_r(float8 x, int8 *signp);\n"
34381"float16 __ovld lgamma_r(float16 x, int16 *signp);\n"
34382"#ifdef cl_khr_fp64\n"
34383"double __ovld lgamma_r(double x, int *signp);\n"
34384"double2 __ovld lgamma_r(double2 x, int2 *signp);\n"
34385"double3 __ovld lgamma_r(double3 x, int3 *signp);\n"
34386"double4 __ovld lgamma_r(double4 x, int4 *signp);\n"
34387"double8 __ovld lgamma_r(double8 x, int8 *signp);\n"
34388"double16 __ovld lgamma_r(double16 x, int16 *signp);\n"
34389"#endif //cl_khr_fp64\n"
34390"#ifdef cl_khr_fp16\n"
34391"half __ovld lgamma_r(half x, int *signp);\n"
34392"half2 __ovld lgamma_r(half2 x, int2 *signp);\n"
34393"half3 __ovld lgamma_r(half3 x, int3 *signp);\n"
34394"half4 __ovld lgamma_r(half4 x, int4 *signp);\n"
34395"half8 __ovld lgamma_r(half8 x, int8 *signp);\n"
34396"half16 __ovld lgamma_r(half16 x, int16 *signp);\n"
34397"#endif //cl_khr_fp16\n"
34398"#else\n"
34399"float __ovld lgamma_r(float x, __global int *signp);\n"
34400"float2 __ovld lgamma_r(float2 x, __global int2 *signp);\n"
34401"float3 __ovld lgamma_r(float3 x, __global int3 *signp);\n"
34402"float4 __ovld lgamma_r(float4 x, __global int4 *signp);\n"
34403"float8 __ovld lgamma_r(float8 x, __global int8 *signp);\n"
34404"float16 __ovld lgamma_r(float16 x, __global int16 *signp);\n"
34405"float __ovld lgamma_r(float x, __local int *signp);\n"
34406"float2 __ovld lgamma_r(float2 x, __local int2 *signp);\n"
34407"float3 __ovld lgamma_r(float3 x, __local int3 *signp);\n"
34408"float4 __ovld lgamma_r(float4 x, __local int4 *signp);\n"
34409"float8 __ovld lgamma_r(float8 x, __local int8 *signp);\n"
34410"float16 __ovld lgamma_r(float16 x, __local int16 *signp);\n"
34411"float __ovld lgamma_r(float x, __private int *signp);\n"
34412"float2 __ovld lgamma_r(float2 x, __private int2 *signp);\n"
34413"float3 __ovld lgamma_r(float3 x, __private int3 *signp);\n"
34414"float4 __ovld lgamma_r(float4 x, __private int4 *signp);\n"
34415"float8 __ovld lgamma_r(float8 x, __private int8 *signp);\n"
34416"float16 __ovld lgamma_r(float16 x, __private int16 *signp);\n"
34417"#ifdef cl_khr_fp64\n"
34418"double __ovld lgamma_r(double x, __global int *signp);\n"
34419"double2 __ovld lgamma_r(double2 x, __global int2 *signp);\n"
34420"double3 __ovld lgamma_r(double3 x, __global int3 *signp);\n"
34421"double4 __ovld lgamma_r(double4 x, __global int4 *signp);\n"
34422"double8 __ovld lgamma_r(double8 x, __global int8 *signp);\n"
34423"double16 __ovld lgamma_r(double16 x, __global int16 *signp);\n"
34424"double __ovld lgamma_r(double x, __local int *signp);\n"
34425"double2 __ovld lgamma_r(double2 x, __local int2 *signp);\n"
34426"double3 __ovld lgamma_r(double3 x, __local int3 *signp);\n"
34427"double4 __ovld lgamma_r(double4 x, __local int4 *signp);\n"
34428"double8 __ovld lgamma_r(double8 x, __local int8 *signp);\n"
34429"double16 __ovld lgamma_r(double16 x, __local int16 *signp);\n"
34430"double __ovld lgamma_r(double x, __private int *signp);\n"
34431"double2 __ovld lgamma_r(double2 x, __private int2 *signp);\n"
34432"double3 __ovld lgamma_r(double3 x, __private int3 *signp);\n"
34433"double4 __ovld lgamma_r(double4 x, __private int4 *signp);\n"
34434"double8 __ovld lgamma_r(double8 x, __private int8 *signp);\n"
34435"double16 __ovld lgamma_r(double16 x, __private int16 *signp);\n"
34436"#endif //cl_khr_fp64\n"
34437"#ifdef cl_khr_fp16\n"
34438"half __ovld lgamma_r(half x, __global int *signp);\n"
34439"half2 __ovld lgamma_r(half2 x, __global int2 *signp);\n"
34440"half3 __ovld lgamma_r(half3 x, __global int3 *signp);\n"
34441"half4 __ovld lgamma_r(half4 x, __global int4 *signp);\n"
34442"half8 __ovld lgamma_r(half8 x, __global int8 *signp);\n"
34443"half16 __ovld lgamma_r(half16 x, __global int16 *signp);\n"
34444"half __ovld lgamma_r(half x, __local int *signp);\n"
34445"half2 __ovld lgamma_r(half2 x, __local int2 *signp);\n"
34446"half3 __ovld lgamma_r(half3 x, __local int3 *signp);\n"
34447"half4 __ovld lgamma_r(half4 x, __local int4 *signp);\n"
34448"half8 __ovld lgamma_r(half8 x, __local int8 *signp);\n"
34449"half16 __ovld lgamma_r(half16 x, __local int16 *signp);\n"
34450"half __ovld lgamma_r(half x, __private int *signp);\n"
34451"half2 __ovld lgamma_r(half2 x, __private int2 *signp);\n"
34452"half3 __ovld lgamma_r(half3 x, __private int3 *signp);\n"
34453"half4 __ovld lgamma_r(half4 x, __private int4 *signp);\n"
34454"half8 __ovld lgamma_r(half8 x, __private int8 *signp);\n"
34455"half16 __ovld lgamma_r(half16 x, __private int16 *signp);\n"
34456"#endif //cl_khr_fp16\n"
34457"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
34458"\n"
34459"/**\n"
34460" * Compute natural logarithm.\n"
34461" */\n"
34462"float __ovld __cnfn log(float);\n"
34463"float2 __ovld __cnfn log(float2);\n"
34464"float3 __ovld __cnfn log(float3);\n"
34465"float4 __ovld __cnfn log(float4);\n"
34466"float8 __ovld __cnfn log(float8);\n"
34467"float16 __ovld __cnfn log(float16);\n"
34468"#ifdef cl_khr_fp64\n"
34469"double __ovld __cnfn log(double);\n"
34470"double2 __ovld __cnfn log(double2);\n"
34471"double3 __ovld __cnfn log(double3);\n"
34472"double4 __ovld __cnfn log(double4);\n"
34473"double8 __ovld __cnfn log(double8);\n"
34474"double16 __ovld __cnfn log(double16);\n"
34475"#endif //cl_khr_fp64\n"
34476"#ifdef cl_khr_fp16\n"
34477"half __ovld __cnfn log(half);\n"
34478"half2 __ovld __cnfn log(half2);\n"
34479"half3 __ovld __cnfn log(half3);\n"
34480"half4 __ovld __cnfn log(half4);\n"
34481"half8 __ovld __cnfn log(half8);\n"
34482"half16 __ovld __cnfn log(half16);\n"
34483"#endif //cl_khr_fp16\n"
34484"\n"
34485"/**\n"
34486" * Compute a base 2 logarithm.\n"
34487" */\n"
34488"float __ovld __cnfn log2(float);\n"
34489"float2 __ovld __cnfn log2(float2);\n"
34490"float3 __ovld __cnfn log2(float3);\n"
34491"float4 __ovld __cnfn log2(float4);\n"
34492"float8 __ovld __cnfn log2(float8);\n"
34493"float16 __ovld __cnfn log2(float16);\n"
34494"#ifdef cl_khr_fp64\n"
34495"double __ovld __cnfn log2(double);\n"
34496"double2 __ovld __cnfn log2(double2);\n"
34497"double3 __ovld __cnfn log2(double3);\n"
34498"double4 __ovld __cnfn log2(double4);\n"
34499"double8 __ovld __cnfn log2(double8);\n"
34500"double16 __ovld __cnfn log2(double16);\n"
34501"#endif //cl_khr_fp64\n"
34502"#ifdef cl_khr_fp16\n"
34503"half __ovld __cnfn log2(half);\n"
34504"half2 __ovld __cnfn log2(half2);\n"
34505"half3 __ovld __cnfn log2(half3);\n"
34506"half4 __ovld __cnfn log2(half4);\n"
34507"half8 __ovld __cnfn log2(half8);\n"
34508"half16 __ovld __cnfn log2(half16);\n"
34509"#endif //cl_khr_fp16\n"
34510"\n"
34511"/**\n"
34512" * Compute a base 10 logarithm.\n"
34513" */\n"
34514"float __ovld __cnfn log10(float);\n"
34515"float2 __ovld __cnfn log10(float2);\n"
34516"float3 __ovld __cnfn log10(float3);\n"
34517"float4 __ovld __cnfn log10(float4);\n"
34518"float8 __ovld __cnfn log10(float8);\n"
34519"float16 __ovld __cnfn log10(float16);\n"
34520"#ifdef cl_khr_fp64\n"
34521"double __ovld __cnfn log10(double);\n"
34522"double2 __ovld __cnfn log10(double2);\n"
34523"double3 __ovld __cnfn log10(double3);\n"
34524"double4 __ovld __cnfn log10(double4);\n"
34525"double8 __ovld __cnfn log10(double8);\n"
34526"double16 __ovld __cnfn log10(double16);\n"
34527"#endif //cl_khr_fp64\n"
34528"#ifdef cl_khr_fp16\n"
34529"half __ovld __cnfn log10(half);\n"
34530"half2 __ovld __cnfn log10(half2);\n"
34531"half3 __ovld __cnfn log10(half3);\n"
34532"half4 __ovld __cnfn log10(half4);\n"
34533"half8 __ovld __cnfn log10(half8);\n"
34534"half16 __ovld __cnfn log10(half16);\n"
34535"#endif //cl_khr_fp16\n"
34536"\n"
34537"/**\n"
34538" * Compute a base e logarithm of (1.0 + x).\n"
34539" */\n"
34540"float __ovld __cnfn log1p(float x);\n"
34541"float2 __ovld __cnfn log1p(float2 x);\n"
34542"float3 __ovld __cnfn log1p(float3 x);\n"
34543"float4 __ovld __cnfn log1p(float4 x);\n"
34544"float8 __ovld __cnfn log1p(float8 x);\n"
34545"float16 __ovld __cnfn log1p(float16 x);\n"
34546"#ifdef cl_khr_fp64\n"
34547"double __ovld __cnfn log1p(double x);\n"
34548"double2 __ovld __cnfn log1p(double2 x);\n"
34549"double3 __ovld __cnfn log1p(double3 x);\n"
34550"double4 __ovld __cnfn log1p(double4 x);\n"
34551"double8 __ovld __cnfn log1p(double8 x);\n"
34552"double16 __ovld __cnfn log1p(double16 x);\n"
34553"#endif //cl_khr_fp64\n"
34554"#ifdef cl_khr_fp16\n"
34555"half __ovld __cnfn log1p(half x);\n"
34556"half2 __ovld __cnfn log1p(half2 x);\n"
34557"half3 __ovld __cnfn log1p(half3 x);\n"
34558"half4 __ovld __cnfn log1p(half4 x);\n"
34559"half8 __ovld __cnfn log1p(half8 x);\n"
34560"half16 __ovld __cnfn log1p(half16 x);\n"
34561"#endif //cl_khr_fp16\n"
34562"\n"
34563"/**\n"
34564" * Compute the exponent of x, which is the integral\n"
34565" * part of logr | x |.\n"
34566" */\n"
34567"float __ovld __cnfn logb(float x);\n"
34568"float2 __ovld __cnfn logb(float2 x);\n"
34569"float3 __ovld __cnfn logb(float3 x);\n"
34570"float4 __ovld __cnfn logb(float4 x);\n"
34571"float8 __ovld __cnfn logb(float8 x);\n"
34572"float16 __ovld __cnfn logb(float16 x);\n"
34573"#ifdef cl_khr_fp64\n"
34574"double __ovld __cnfn logb(double x);\n"
34575"double2 __ovld __cnfn logb(double2 x);\n"
34576"double3 __ovld __cnfn logb(double3 x);\n"
34577"double4 __ovld __cnfn logb(double4 x);\n"
34578"double8 __ovld __cnfn logb(double8 x);\n"
34579"double16 __ovld __cnfn logb(double16 x);\n"
34580"#endif //cl_khr_fp64\n"
34581"#ifdef cl_khr_fp16\n"
34582"half __ovld __cnfn logb(half x);\n"
34583"half2 __ovld __cnfn logb(half2 x);\n"
34584"half3 __ovld __cnfn logb(half3 x);\n"
34585"half4 __ovld __cnfn logb(half4 x);\n"
34586"half8 __ovld __cnfn logb(half8 x);\n"
34587"half16 __ovld __cnfn logb(half16 x);\n"
34588"#endif //cl_khr_fp16\n"
34589"\n"
34590"/**\n"
34591" * mad approximates a * b + c. Whether or how the\n"
34592" * product of a * b is rounded and how supernormal or\n"
34593" * subnormal intermediate products are handled is not\n"
34594" * defined. mad is intended to be used where speed is\n"
34595" * preferred over accuracy.\n"
34596" */\n"
34597"float __ovld __cnfn mad(float a, float b, float c);\n"
34598"float2 __ovld __cnfn mad(float2 a, float2 b, float2 c);\n"
34599"float3 __ovld __cnfn mad(float3 a, float3 b, float3 c);\n"
34600"float4 __ovld __cnfn mad(float4 a, float4 b, float4 c);\n"
34601"float8 __ovld __cnfn mad(float8 a, float8 b, float8 c);\n"
34602"float16 __ovld __cnfn mad(float16 a, float16 b, float16 c);\n"
34603"#ifdef cl_khr_fp64\n"
34604"double __ovld __cnfn mad(double a, double b, double c);\n"
34605"double2 __ovld __cnfn mad(double2 a, double2 b, double2 c);\n"
34606"double3 __ovld __cnfn mad(double3 a, double3 b, double3 c);\n"
34607"double4 __ovld __cnfn mad(double4 a, double4 b, double4 c);\n"
34608"double8 __ovld __cnfn mad(double8 a, double8 b, double8 c);\n"
34609"double16 __ovld __cnfn mad(double16 a, double16 b, double16 c);\n"
34610"#endif //cl_khr_fp64\n"
34611"#ifdef cl_khr_fp16\n"
34612"half __ovld __cnfn mad(half a, half b, half c);\n"
34613"half2 __ovld __cnfn mad(half2 a, half2 b, half2 c);\n"
34614"half3 __ovld __cnfn mad(half3 a, half3 b, half3 c);\n"
34615"half4 __ovld __cnfn mad(half4 a, half4 b, half4 c);\n"
34616"half8 __ovld __cnfn mad(half8 a, half8 b, half8 c);\n"
34617"half16 __ovld __cnfn mad(half16 a, half16 b, half16 c);\n"
34618"#endif //cl_khr_fp16\n"
34619"\n"
34620"/**\n"
34621" * Returns x if | x | > | y |, y if | y | > | x |, otherwise\n"
34622" * fmax(x, y).\n"
34623" */\n"
34624"float __ovld __cnfn maxmag(float x, float y);\n"
34625"float2 __ovld __cnfn maxmag(float2 x, float2 y);\n"
34626"float3 __ovld __cnfn maxmag(float3 x, float3 y);\n"
34627"float4 __ovld __cnfn maxmag(float4 x, float4 y);\n"
34628"float8 __ovld __cnfn maxmag(float8 x, float8 y);\n"
34629"float16 __ovld __cnfn maxmag(float16 x, float16 y);\n"
34630"#ifdef cl_khr_fp64\n"
34631"double __ovld __cnfn maxmag(double x, double y);\n"
34632"double2 __ovld __cnfn maxmag(double2 x, double2 y);\n"
34633"double3 __ovld __cnfn maxmag(double3 x, double3 y);\n"
34634"double4 __ovld __cnfn maxmag(double4 x, double4 y);\n"
34635"double8 __ovld __cnfn maxmag(double8 x, double8 y);\n"
34636"double16 __ovld __cnfn maxmag(double16 x, double16 y);\n"
34637"#endif //cl_khr_fp64\n"
34638"#ifdef cl_khr_fp16\n"
34639"half __ovld __cnfn maxmag(half x, half y);\n"
34640"half2 __ovld __cnfn maxmag(half2 x, half2 y);\n"
34641"half3 __ovld __cnfn maxmag(half3 x, half3 y);\n"
34642"half4 __ovld __cnfn maxmag(half4 x, half4 y);\n"
34643"half8 __ovld __cnfn maxmag(half8 x, half8 y);\n"
34644"half16 __ovld __cnfn maxmag(half16 x, half16 y);\n"
34645"#endif //cl_khr_fp16\n"
34646"\n"
34647"/**\n"
34648" * Returns x if | x | < | y |, y if | y | < | x |, otherwise\n"
34649" * fmin(x, y).\n"
34650" */\n"
34651"float __ovld __cnfn minmag(float x, float y);\n"
34652"float2 __ovld __cnfn minmag(float2 x, float2 y);\n"
34653"float3 __ovld __cnfn minmag(float3 x, float3 y);\n"
34654"float4 __ovld __cnfn minmag(float4 x, float4 y);\n"
34655"float8 __ovld __cnfn minmag(float8 x, float8 y);\n"
34656"float16 __ovld __cnfn minmag(float16 x, float16 y);\n"
34657"#ifdef cl_khr_fp64\n"
34658"double __ovld __cnfn minmag(double x, double y);\n"
34659"double2 __ovld __cnfn minmag(double2 x, double2 y);\n"
34660"double3 __ovld __cnfn minmag(double3 x, double3 y);\n"
34661"double4 __ovld __cnfn minmag(double4 x, double4 y);\n"
34662"double8 __ovld __cnfn minmag(double8 x, double8 y);\n"
34663"double16 __ovld __cnfn minmag(double16 x, double16 y);\n"
34664"#endif //cl_khr_fp64\n"
34665"#ifdef cl_khr_fp16\n"
34666"half __ovld __cnfn minmag(half x, half y);\n"
34667"half2 __ovld __cnfn minmag(half2 x, half2 y);\n"
34668"half3 __ovld __cnfn minmag(half3 x, half3 y);\n"
34669"half4 __ovld __cnfn minmag(half4 x, half4 y);\n"
34670"half8 __ovld __cnfn minmag(half8 x, half8 y);\n"
34671"half16 __ovld __cnfn minmag(half16 x, half16 y);\n"
34672"#endif //cl_khr_fp16\n"
34673"\n"
34674"/**\n"
34675" * Decompose a floating-point number. The modf\n"
34676" * function breaks the argument x into integral and\n"
34677" * fractional parts, each of which has the same sign as\n"
34678" * the argument. It stores the integral part in the object\n"
34679" * pointed to by iptr.\n"
34680" */\n"
34681"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
34682"float __ovld modf(float x, float *iptr);\n"
34683"float2 __ovld modf(float2 x, float2 *iptr);\n"
34684"float3 __ovld modf(float3 x, float3 *iptr);\n"
34685"float4 __ovld modf(float4 x, float4 *iptr);\n"
34686"float8 __ovld modf(float8 x, float8 *iptr);\n"
34687"float16 __ovld modf(float16 x, float16 *iptr);\n"
34688"#ifdef cl_khr_fp64\n"
34689"double __ovld modf(double x, double *iptr);\n"
34690"double2 __ovld modf(double2 x, double2 *iptr);\n"
34691"double3 __ovld modf(double3 x, double3 *iptr);\n"
34692"double4 __ovld modf(double4 x, double4 *iptr);\n"
34693"double8 __ovld modf(double8 x, double8 *iptr);\n"
34694"double16 __ovld modf(double16 x, double16 *iptr);\n"
34695"#endif //cl_khr_fp64\n"
34696"#ifdef cl_khr_fp16\n"
34697"half __ovld modf(half x, half *iptr);\n"
34698"half2 __ovld modf(half2 x, half2 *iptr);\n"
34699"half3 __ovld modf(half3 x, half3 *iptr);\n"
34700"half4 __ovld modf(half4 x, half4 *iptr);\n"
34701"half8 __ovld modf(half8 x, half8 *iptr);\n"
34702"half16 __ovld modf(half16 x, half16 *iptr);\n"
34703"#endif //cl_khr_fp16\n"
34704"#else\n"
34705"float __ovld modf(float x, __global float *iptr);\n"
34706"float2 __ovld modf(float2 x, __global float2 *iptr);\n"
34707"float3 __ovld modf(float3 x, __global float3 *iptr);\n"
34708"float4 __ovld modf(float4 x, __global float4 *iptr);\n"
34709"float8 __ovld modf(float8 x, __global float8 *iptr);\n"
34710"float16 __ovld modf(float16 x, __global float16 *iptr);\n"
34711"float __ovld modf(float x, __local float *iptr);\n"
34712"float2 __ovld modf(float2 x, __local float2 *iptr);\n"
34713"float3 __ovld modf(float3 x, __local float3 *iptr);\n"
34714"float4 __ovld modf(float4 x, __local float4 *iptr);\n"
34715"float8 __ovld modf(float8 x, __local float8 *iptr);\n"
34716"float16 __ovld modf(float16 x, __local float16 *iptr);\n"
34717"float __ovld modf(float x, __private float *iptr);\n"
34718"float2 __ovld modf(float2 x, __private float2 *iptr);\n"
34719"float3 __ovld modf(float3 x, __private float3 *iptr);\n"
34720"float4 __ovld modf(float4 x, __private float4 *iptr);\n"
34721"float8 __ovld modf(float8 x, __private float8 *iptr);\n"
34722"float16 __ovld modf(float16 x, __private float16 *iptr);\n"
34723"#ifdef cl_khr_fp64\n"
34724"double __ovld modf(double x, __global double *iptr);\n"
34725"double2 __ovld modf(double2 x, __global double2 *iptr);\n"
34726"double3 __ovld modf(double3 x, __global double3 *iptr);\n"
34727"double4 __ovld modf(double4 x, __global double4 *iptr);\n"
34728"double8 __ovld modf(double8 x, __global double8 *iptr);\n"
34729"double16 __ovld modf(double16 x, __global double16 *iptr);\n"
34730"double __ovld modf(double x, __local double *iptr);\n"
34731"double2 __ovld modf(double2 x, __local double2 *iptr);\n"
34732"double3 __ovld modf(double3 x, __local double3 *iptr);\n"
34733"double4 __ovld modf(double4 x, __local double4 *iptr);\n"
34734"double8 __ovld modf(double8 x, __local double8 *iptr);\n"
34735"double16 __ovld modf(double16 x, __local double16 *iptr);\n"
34736"double __ovld modf(double x, __private double *iptr);\n"
34737"double2 __ovld modf(double2 x, __private double2 *iptr);\n"
34738"double3 __ovld modf(double3 x, __private double3 *iptr);\n"
34739"double4 __ovld modf(double4 x, __private double4 *iptr);\n"
34740"double8 __ovld modf(double8 x, __private double8 *iptr);\n"
34741"double16 __ovld modf(double16 x, __private double16 *iptr);\n"
34742"#endif //cl_khr_fp64\n"
34743"#ifdef cl_khr_fp16\n"
34744"half __ovld modf(half x, __global half *iptr);\n"
34745"half2 __ovld modf(half2 x, __global half2 *iptr);\n"
34746"half3 __ovld modf(half3 x, __global half3 *iptr);\n"
34747"half4 __ovld modf(half4 x, __global half4 *iptr);\n"
34748"half8 __ovld modf(half8 x, __global half8 *iptr);\n"
34749"half16 __ovld modf(half16 x, __global half16 *iptr);\n"
34750"half __ovld modf(half x, __local half *iptr);\n"
34751"half2 __ovld modf(half2 x, __local half2 *iptr);\n"
34752"half3 __ovld modf(half3 x, __local half3 *iptr);\n"
34753"half4 __ovld modf(half4 x, __local half4 *iptr);\n"
34754"half8 __ovld modf(half8 x, __local half8 *iptr);\n"
34755"half16 __ovld modf(half16 x, __local half16 *iptr);\n"
34756"half __ovld modf(half x, __private half *iptr);\n"
34757"half2 __ovld modf(half2 x, __private half2 *iptr);\n"
34758"half3 __ovld modf(half3 x, __private half3 *iptr);\n"
34759"half4 __ovld modf(half4 x, __private half4 *iptr);\n"
34760"half8 __ovld modf(half8 x, __private half8 *iptr);\n"
34761"half16 __ovld modf(half16 x, __private half16 *iptr);\n"
34762"#endif //cl_khr_fp16\n"
34763"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
34764"\n"
34765"/**\n"
34766" * Returns a quiet NaN. The nancode may be placed\n"
34767" * in the significand of the resulting NaN.\n"
34768" */\n"
34769"float __ovld __cnfn nan(uint nancode);\n"
34770"float2 __ovld __cnfn nan(uint2 nancode);\n"
34771"float3 __ovld __cnfn nan(uint3 nancode);\n"
34772"float4 __ovld __cnfn nan(uint4 nancode);\n"
34773"float8 __ovld __cnfn nan(uint8 nancode);\n"
34774"float16 __ovld __cnfn nan(uint16 nancode);\n"
34775"#ifdef cl_khr_fp64\n"
34776"double __ovld __cnfn nan(ulong nancode);\n"
34777"double2 __ovld __cnfn nan(ulong2 nancode);\n"
34778"double3 __ovld __cnfn nan(ulong3 nancode);\n"
34779"double4 __ovld __cnfn nan(ulong4 nancode);\n"
34780"double8 __ovld __cnfn nan(ulong8 nancode);\n"
34781"double16 __ovld __cnfn nan(ulong16 nancode);\n"
34782"#endif //cl_khr_fp64\n"
34783"#ifdef cl_khr_fp16\n"
34784"half __ovld __cnfn nan(ushort nancode);\n"
34785"half2 __ovld __cnfn nan(ushort2 nancode);\n"
34786"half3 __ovld __cnfn nan(ushort3 nancode);\n"
34787"half4 __ovld __cnfn nan(ushort4 nancode);\n"
34788"half8 __ovld __cnfn nan(ushort8 nancode);\n"
34789"half16 __ovld __cnfn nan(ushort16 nancode);\n"
34790"#endif //cl_khr_fp16\n"
34791"\n"
34792"/**\n"
34793" * Computes the next representable single-precision\n"
34794" * floating-point value following x in the direction of\n"
34795" * y. Thus, if y is less than x, nextafter() returns the\n"
34796" * largest representable floating-point number less\n"
34797" * than x.\n"
34798" */\n"
34799"float __ovld __cnfn nextafter(float x, float y);\n"
34800"float2 __ovld __cnfn nextafter(float2 x, float2 y);\n"
34801"float3 __ovld __cnfn nextafter(float3 x, float3 y);\n"
34802"float4 __ovld __cnfn nextafter(float4 x, float4 y);\n"
34803"float8 __ovld __cnfn nextafter(float8 x, float8 y);\n"
34804"float16 __ovld __cnfn nextafter(float16 x, float16 y);\n"
34805"#ifdef cl_khr_fp64\n"
34806"double __ovld __cnfn nextafter(double x, double y);\n"
34807"double2 __ovld __cnfn nextafter(double2 x, double2 y);\n"
34808"double3 __ovld __cnfn nextafter(double3 x, double3 y);\n"
34809"double4 __ovld __cnfn nextafter(double4 x, double4 y);\n"
34810"double8 __ovld __cnfn nextafter(double8 x, double8 y);\n"
34811"double16 __ovld __cnfn nextafter(double16 x, double16 y);\n"
34812"#endif //cl_khr_fp64\n"
34813"#ifdef cl_khr_fp16\n"
34814"half __ovld __cnfn nextafter(half x, half y);\n"
34815"half2 __ovld __cnfn nextafter(half2 x, half2 y);\n"
34816"half3 __ovld __cnfn nextafter(half3 x, half3 y);\n"
34817"half4 __ovld __cnfn nextafter(half4 x, half4 y);\n"
34818"half8 __ovld __cnfn nextafter(half8 x, half8 y);\n"
34819"half16 __ovld __cnfn nextafter(half16 x, half16 y);\n"
34820"#endif //cl_khr_fp16\n"
34821"\n"
34822"/**\n"
34823" * Compute x to the power y.\n"
34824" */\n"
34825"float __ovld __cnfn pow(float x, float y);\n"
34826"float2 __ovld __cnfn pow(float2 x, float2 y);\n"
34827"float3 __ovld __cnfn pow(float3 x, float3 y);\n"
34828"float4 __ovld __cnfn pow(float4 x, float4 y);\n"
34829"float8 __ovld __cnfn pow(float8 x, float8 y);\n"
34830"float16 __ovld __cnfn pow(float16 x, float16 y);\n"
34831"#ifdef cl_khr_fp64\n"
34832"double __ovld __cnfn pow(double x, double y);\n"
34833"double2 __ovld __cnfn pow(double2 x, double2 y);\n"
34834"double3 __ovld __cnfn pow(double3 x, double3 y);\n"
34835"double4 __ovld __cnfn pow(double4 x, double4 y);\n"
34836"double8 __ovld __cnfn pow(double8 x, double8 y);\n"
34837"double16 __ovld __cnfn pow(double16 x, double16 y);\n"
34838"#endif //cl_khr_fp64\n"
34839"#ifdef cl_khr_fp16\n"
34840"half __ovld __cnfn pow(half x, half y);\n"
34841"half2 __ovld __cnfn pow(half2 x, half2 y);\n"
34842"half3 __ovld __cnfn pow(half3 x, half3 y);\n"
34843"half4 __ovld __cnfn pow(half4 x, half4 y);\n"
34844"half8 __ovld __cnfn pow(half8 x, half8 y);\n"
34845"half16 __ovld __cnfn pow(half16 x, half16 y);\n"
34846"#endif //cl_khr_fp16\n"
34847"\n"
34848"/**\n"
34849" * Compute x to the power y, where y is an integer.\n"
34850" */\n"
34851"float __ovld __cnfn pown(float x, int y);\n"
34852"float2 __ovld __cnfn pown(float2 x, int2 y);\n"
34853"float3 __ovld __cnfn pown(float3 x, int3 y);\n"
34854"float4 __ovld __cnfn pown(float4 x, int4 y);\n"
34855"float8 __ovld __cnfn pown(float8 x, int8 y);\n"
34856"float16 __ovld __cnfn pown(float16 x, int16 y);\n"
34857"#ifdef cl_khr_fp64\n"
34858"double __ovld __cnfn pown(double x, int y);\n"
34859"double2 __ovld __cnfn pown(double2 x, int2 y);\n"
34860"double3 __ovld __cnfn pown(double3 x, int3 y);\n"
34861"double4 __ovld __cnfn pown(double4 x, int4 y);\n"
34862"double8 __ovld __cnfn pown(double8 x, int8 y);\n"
34863"double16 __ovld __cnfn pown(double16 x, int16 y);\n"
34864"#endif //cl_khr_fp64\n"
34865"#ifdef cl_khr_fp16\n"
34866"half __ovld __cnfn pown(half x, int y);\n"
34867"half2 __ovld __cnfn pown(half2 x, int2 y);\n"
34868"half3 __ovld __cnfn pown(half3 x, int3 y);\n"
34869"half4 __ovld __cnfn pown(half4 x, int4 y);\n"
34870"half8 __ovld __cnfn pown(half8 x, int8 y);\n"
34871"half16 __ovld __cnfn pown(half16 x, int16 y);\n"
34872"#endif //cl_khr_fp16\n"
34873"\n"
34874"/**\n"
34875" * Compute x to the power y, where x is >= 0.\n"
34876" */\n"
34877"float __ovld __cnfn powr(float x, float y);\n"
34878"float2 __ovld __cnfn powr(float2 x, float2 y);\n"
34879"float3 __ovld __cnfn powr(float3 x, float3 y);\n"
34880"float4 __ovld __cnfn powr(float4 x, float4 y);\n"
34881"float8 __ovld __cnfn powr(float8 x, float8 y);\n"
34882"float16 __ovld __cnfn powr(float16 x, float16 y);\n"
34883"#ifdef cl_khr_fp64\n"
34884"double __ovld __cnfn powr(double x, double y);\n"
34885"double2 __ovld __cnfn powr(double2 x, double2 y);\n"
34886"double3 __ovld __cnfn powr(double3 x, double3 y);\n"
34887"double4 __ovld __cnfn powr(double4 x, double4 y);\n"
34888"double8 __ovld __cnfn powr(double8 x, double8 y);\n"
34889"double16 __ovld __cnfn powr(double16 x, double16 y);\n"
34890"#endif //cl_khr_fp64\n"
34891"#ifdef cl_khr_fp16\n"
34892"half __ovld __cnfn powr(half x, half y);\n"
34893"half2 __ovld __cnfn powr(half2 x, half2 y);\n"
34894"half3 __ovld __cnfn powr(half3 x, half3 y);\n"
34895"half4 __ovld __cnfn powr(half4 x, half4 y);\n"
34896"half8 __ovld __cnfn powr(half8 x, half8 y);\n"
34897"half16 __ovld __cnfn powr(half16 x, half16 y);\n"
34898"#endif //cl_khr_fp16\n"
34899"\n"
34900"/**\n"
34901" * Compute the value r such that r = x - n*y, where n\n"
34902" * is the integer nearest the exact value of x/y. If there\n"
34903" * are two integers closest to x/y, n shall be the even\n"
34904" * one. If r is zero, it is given the same sign as x.\n"
34905" */\n"
34906"float __ovld __cnfn remainder(float x, float y);\n"
34907"float2 __ovld __cnfn remainder(float2 x, float2 y);\n"
34908"float3 __ovld __cnfn remainder(float3 x, float3 y);\n"
34909"float4 __ovld __cnfn remainder(float4 x, float4 y);\n"
34910"float8 __ovld __cnfn remainder(float8 x, float8 y);\n"
34911"float16 __ovld __cnfn remainder(float16 x, float16 y);\n"
34912"#ifdef cl_khr_fp64\n"
34913"double __ovld __cnfn remainder(double x, double y);\n"
34914"double2 __ovld __cnfn remainder(double2 x, double2 y);\n"
34915"double3 __ovld __cnfn remainder(double3 x, double3 y);\n"
34916"double4 __ovld __cnfn remainder(double4 x, double4 y);\n"
34917"double8 __ovld __cnfn remainder(double8 x, double8 y);\n"
34918"double16 __ovld __cnfn remainder(double16 x, double16 y);\n"
34919"#endif //cl_khr_fp64\n"
34920"#ifdef cl_khr_fp16\n"
34921"half __ovld __cnfn remainder(half x, half y);\n"
34922"half2 __ovld __cnfn remainder(half2 x, half2 y);\n"
34923"half3 __ovld __cnfn remainder(half3 x, half3 y);\n"
34924"half4 __ovld __cnfn remainder(half4 x, half4 y);\n"
34925"half8 __ovld __cnfn remainder(half8 x, half8 y);\n"
34926"half16 __ovld __cnfn remainder(half16 x, half16 y);\n"
34927"#endif //cl_khr_fp16\n"
34928"\n"
34929"/**\n"
34930" * The remquo function computes the value r such\n"
34931" * that r = x - n*y, where n is the integer nearest the\n"
34932" * exact value of x/y. If there are two integers closest\n"
34933" * to x/y, n shall be the even one. If r is zero, it is\n"
34934" * given the same sign as x. This is the same value\n"
34935" * that is returned by the remainder function.\n"
34936" * remquo also calculates the lower seven bits of the\n"
34937" * integral quotient x/y, and gives that value the same\n"
34938" * sign as x/y. It stores this signed value in the object\n"
34939" * pointed to by quo.\n"
34940" */\n"
34941"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
34942"float __ovld remquo(float x, float y, int *quo);\n"
34943"float2 __ovld remquo(float2 x, float2 y, int2 *quo);\n"
34944"float3 __ovld remquo(float3 x, float3 y, int3 *quo);\n"
34945"float4 __ovld remquo(float4 x, float4 y, int4 *quo);\n"
34946"float8 __ovld remquo(float8 x, float8 y, int8 *quo);\n"
34947"float16 __ovld remquo(float16 x, float16 y, int16 *quo);\n"
34948"#ifdef cl_khr_fp64\n"
34949"double __ovld remquo(double x, double y, int *quo);\n"
34950"double2 __ovld remquo(double2 x, double2 y, int2 *quo);\n"
34951"double3 __ovld remquo(double3 x, double3 y, int3 *quo);\n"
34952"double4 __ovld remquo(double4 x, double4 y, int4 *quo);\n"
34953"double8 __ovld remquo(double8 x, double8 y, int8 *quo);\n"
34954"double16 __ovld remquo(double16 x, double16 y, int16 *quo);\n"
34955"#endif //cl_khr_fp64\n"
34956"#ifdef cl_khr_fp16\n"
34957"half __ovld remquo(half x, half y, int *quo);\n"
34958"half2 __ovld remquo(half2 x, half2 y, int2 *quo);\n"
34959"half3 __ovld remquo(half3 x, half3 y, int3 *quo);\n"
34960"half4 __ovld remquo(half4 x, half4 y, int4 *quo);\n"
34961"half8 __ovld remquo(half8 x, half8 y, int8 *quo);\n"
34962"half16 __ovld remquo(half16 x, half16 y, int16 *quo);\n"
34963"\n"
34964"#endif //cl_khr_fp16\n"
34965"#else\n"
34966"float __ovld remquo(float x, float y, __global int *quo);\n"
34967"float2 __ovld remquo(float2 x, float2 y, __global int2 *quo);\n"
34968"float3 __ovld remquo(float3 x, float3 y, __global int3 *quo);\n"
34969"float4 __ovld remquo(float4 x, float4 y, __global int4 *quo);\n"
34970"float8 __ovld remquo(float8 x, float8 y, __global int8 *quo);\n"
34971"float16 __ovld remquo(float16 x, float16 y, __global int16 *quo);\n"
34972"float __ovld remquo(float x, float y, __local int *quo);\n"
34973"float2 __ovld remquo(float2 x, float2 y, __local int2 *quo);\n"
34974"float3 __ovld remquo(float3 x, float3 y, __local int3 *quo);\n"
34975"float4 __ovld remquo(float4 x, float4 y, __local int4 *quo);\n"
34976"float8 __ovld remquo(float8 x, float8 y, __local int8 *quo);\n"
34977"float16 __ovld remquo(float16 x, float16 y, __local int16 *quo);\n"
34978"float __ovld remquo(float x, float y, __private int *quo);\n"
34979"float2 __ovld remquo(float2 x, float2 y, __private int2 *quo);\n"
34980"float3 __ovld remquo(float3 x, float3 y, __private int3 *quo);\n"
34981"float4 __ovld remquo(float4 x, float4 y, __private int4 *quo);\n"
34982"float8 __ovld remquo(float8 x, float8 y, __private int8 *quo);\n"
34983"float16 __ovld remquo(float16 x, float16 y, __private int16 *quo);\n"
34984"#ifdef cl_khr_fp64\n"
34985"double __ovld remquo(double x, double y, __global int *quo);\n"
34986"double2 __ovld remquo(double2 x, double2 y, __global int2 *quo);\n"
34987"double3 __ovld remquo(double3 x, double3 y, __global int3 *quo);\n"
34988"double4 __ovld remquo(double4 x, double4 y, __global int4 *quo);\n"
34989"double8 __ovld remquo(double8 x, double8 y, __global int8 *quo);\n"
34990"double16 __ovld remquo(double16 x, double16 y, __global int16 *quo);\n"
34991"double __ovld remquo(double x, double y, __local int *quo);\n"
34992"double2 __ovld remquo(double2 x, double2 y, __local int2 *quo);\n"
34993"double3 __ovld remquo(double3 x, double3 y, __local int3 *quo);\n"
34994"double4 __ovld remquo(double4 x, double4 y, __local int4 *quo);\n"
34995"double8 __ovld remquo(double8 x, double8 y, __local int8 *quo);\n"
34996"double16 __ovld remquo(double16 x, double16 y, __local int16 *quo);\n"
34997"double __ovld remquo(double x, double y, __private int *quo);\n"
34998"double2 __ovld remquo(double2 x, double2 y, __private int2 *quo);\n"
34999"double3 __ovld remquo(double3 x, double3 y, __private int3 *quo);\n"
35000"double4 __ovld remquo(double4 x, double4 y, __private int4 *quo);\n"
35001"double8 __ovld remquo(double8 x, double8 y, __private int8 *quo);\n"
35002"double16 __ovld remquo(double16 x, double16 y, __private int16 *quo);\n"
35003"#endif //cl_khr_fp64\n"
35004"#ifdef cl_khr_fp16\n"
35005"half __ovld remquo(half x, half y, __global int *quo);\n"
35006"half2 __ovld remquo(half2 x, half2 y, __global int2 *quo);\n"
35007"half3 __ovld remquo(half3 x, half3 y, __global int3 *quo);\n"
35008"half4 __ovld remquo(half4 x, half4 y, __global int4 *quo);\n"
35009"half8 __ovld remquo(half8 x, half8 y, __global int8 *quo);\n"
35010"half16 __ovld remquo(half16 x, half16 y, __global int16 *quo);\n"
35011"half __ovld remquo(half x, half y, __local int *quo);\n"
35012"half2 __ovld remquo(half2 x, half2 y, __local int2 *quo);\n"
35013"half3 __ovld remquo(half3 x, half3 y, __local int3 *quo);\n"
35014"half4 __ovld remquo(half4 x, half4 y, __local int4 *quo);\n"
35015"half8 __ovld remquo(half8 x, half8 y, __local int8 *quo);\n"
35016"half16 __ovld remquo(half16 x, half16 y, __local int16 *quo);\n"
35017"half __ovld remquo(half x, half y, __private int *quo);\n"
35018"half2 __ovld remquo(half2 x, half2 y, __private int2 *quo);\n"
35019"half3 __ovld remquo(half3 x, half3 y, __private int3 *quo);\n"
35020"half4 __ovld remquo(half4 x, half4 y, __private int4 *quo);\n"
35021"half8 __ovld remquo(half8 x, half8 y, __private int8 *quo);\n"
35022"half16 __ovld remquo(half16 x, half16 y, __private int16 *quo);\n"
35023"#endif //cl_khr_fp16\n"
35024"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
35025"/**\n"
35026" * Round to integral value (using round to nearest\n"
35027" * even rounding mode) in floating-point format.\n"
35028" * Refer to section 7.1 for description of rounding\n"
35029" * modes.\n"
35030" */\n"
35031"float __ovld __cnfn rint(float);\n"
35032"float2 __ovld __cnfn rint(float2);\n"
35033"float3 __ovld __cnfn rint(float3);\n"
35034"float4 __ovld __cnfn rint(float4);\n"
35035"float8 __ovld __cnfn rint(float8);\n"
35036"float16 __ovld __cnfn rint(float16);\n"
35037"#ifdef cl_khr_fp64\n"
35038"double __ovld __cnfn rint(double);\n"
35039"double2 __ovld __cnfn rint(double2);\n"
35040"double3 __ovld __cnfn rint(double3);\n"
35041"double4 __ovld __cnfn rint(double4);\n"
35042"double8 __ovld __cnfn rint(double8);\n"
35043"double16 __ovld __cnfn rint(double16);\n"
35044"#endif //cl_khr_fp64\n"
35045"#ifdef cl_khr_fp16\n"
35046"half __ovld __cnfn rint(half);\n"
35047"half2 __ovld __cnfn rint(half2);\n"
35048"half3 __ovld __cnfn rint(half3);\n"
35049"half4 __ovld __cnfn rint(half4);\n"
35050"half8 __ovld __cnfn rint(half8);\n"
35051"half16 __ovld __cnfn rint(half16);\n"
35052"#endif //cl_khr_fp16\n"
35053"\n"
35054"/**\n"
35055" * Compute x to the power 1/y.\n"
35056" */\n"
35057"float __ovld __cnfn rootn(float x, int y);\n"
35058"float2 __ovld __cnfn rootn(float2 x, int2 y);\n"
35059"float3 __ovld __cnfn rootn(float3 x, int3 y);\n"
35060"float4 __ovld __cnfn rootn(float4 x, int4 y);\n"
35061"float8 __ovld __cnfn rootn(float8 x, int8 y);\n"
35062"float16 __ovld __cnfn rootn(float16 x, int16 y);\n"
35063"#ifdef cl_khr_fp64\n"
35064"double __ovld __cnfn rootn(double x, int y);\n"
35065"double2 __ovld __cnfn rootn(double2 x, int2 y);\n"
35066"double3 __ovld __cnfn rootn(double3 x, int3 y);\n"
35067"double4 __ovld __cnfn rootn(double4 x, int4 y);\n"
35068"double8 __ovld __cnfn rootn(double8 x, int8 y);\n"
35069"double16 __ovld __cnfn rootn(double16 x, int16 y);\n"
35070"#endif //cl_khr_fp64\n"
35071"#ifdef cl_khr_fp16\n"
35072"half __ovld __cnfn rootn(half x, int y);\n"
35073"half2 __ovld __cnfn rootn(half2 x, int2 y);\n"
35074"half3 __ovld __cnfn rootn(half3 x, int3 y);\n"
35075"half4 __ovld __cnfn rootn(half4 x, int4 y);\n"
35076"half8 __ovld __cnfn rootn(half8 x, int8 y);\n"
35077"half16 __ovld __cnfn rootn(half16 x, int16 y);\n"
35078"#endif //cl_khr_fp16\n"
35079"\n"
35080"/**\n"
35081" * Return the integral value nearest to x rounding\n"
35082" * halfway cases away from zero, regardless of the\n"
35083" * current rounding direction.\n"
35084" */\n"
35085"float __ovld __cnfn round(float x);\n"
35086"float2 __ovld __cnfn round(float2 x);\n"
35087"float3 __ovld __cnfn round(float3 x);\n"
35088"float4 __ovld __cnfn round(float4 x);\n"
35089"float8 __ovld __cnfn round(float8 x);\n"
35090"float16 __ovld __cnfn round(float16 x);\n"
35091"#ifdef cl_khr_fp64\n"
35092"double __ovld __cnfn round(double x);\n"
35093"double2 __ovld __cnfn round(double2 x);\n"
35094"double3 __ovld __cnfn round(double3 x);\n"
35095"double4 __ovld __cnfn round(double4 x);\n"
35096"double8 __ovld __cnfn round(double8 x);\n"
35097"double16 __ovld __cnfn round(double16 x);\n"
35098"#endif //cl_khr_fp64\n"
35099"#ifdef cl_khr_fp16\n"
35100"half __ovld __cnfn round(half x);\n"
35101"half2 __ovld __cnfn round(half2 x);\n"
35102"half3 __ovld __cnfn round(half3 x);\n"
35103"half4 __ovld __cnfn round(half4 x);\n"
35104"half8 __ovld __cnfn round(half8 x);\n"
35105"half16 __ovld __cnfn round(half16 x);\n"
35106"#endif //cl_khr_fp16\n"
35107"\n"
35108"/**\n"
35109" * Compute inverse square root.\n"
35110" */\n"
35111"float __ovld __cnfn rsqrt(float);\n"
35112"float2 __ovld __cnfn rsqrt(float2);\n"
35113"float3 __ovld __cnfn rsqrt(float3);\n"
35114"float4 __ovld __cnfn rsqrt(float4);\n"
35115"float8 __ovld __cnfn rsqrt(float8);\n"
35116"float16 __ovld __cnfn rsqrt(float16);\n"
35117"#ifdef cl_khr_fp64\n"
35118"double __ovld __cnfn rsqrt(double);\n"
35119"double2 __ovld __cnfn rsqrt(double2);\n"
35120"double3 __ovld __cnfn rsqrt(double3);\n"
35121"double4 __ovld __cnfn rsqrt(double4);\n"
35122"double8 __ovld __cnfn rsqrt(double8);\n"
35123"double16 __ovld __cnfn rsqrt(double16);\n"
35124"#endif //cl_khr_fp64\n"
35125"#ifdef cl_khr_fp16\n"
35126"half __ovld __cnfn rsqrt(half);\n"
35127"half2 __ovld __cnfn rsqrt(half2);\n"
35128"half3 __ovld __cnfn rsqrt(half3);\n"
35129"half4 __ovld __cnfn rsqrt(half4);\n"
35130"half8 __ovld __cnfn rsqrt(half8);\n"
35131"half16 __ovld __cnfn rsqrt(half16);\n"
35132"#endif //cl_khr_fp16\n"
35133"\n"
35134"/**\n"
35135" * Compute sine.\n"
35136" */\n"
35137"float __ovld __cnfn sin(float);\n"
35138"float2 __ovld __cnfn sin(float2);\n"
35139"float3 __ovld __cnfn sin(float3);\n"
35140"float4 __ovld __cnfn sin(float4);\n"
35141"float8 __ovld __cnfn sin(float8);\n"
35142"float16 __ovld __cnfn sin(float16);\n"
35143"#ifdef cl_khr_fp64\n"
35144"double __ovld __cnfn sin(double);\n"
35145"double2 __ovld __cnfn sin(double2);\n"
35146"double3 __ovld __cnfn sin(double3);\n"
35147"double4 __ovld __cnfn sin(double4);\n"
35148"double8 __ovld __cnfn sin(double8);\n"
35149"double16 __ovld __cnfn sin(double16);\n"
35150"#endif //cl_khr_fp64\n"
35151"#ifdef cl_khr_fp16\n"
35152"half __ovld __cnfn sin(half);\n"
35153"half2 __ovld __cnfn sin(half2);\n"
35154"half3 __ovld __cnfn sin(half3);\n"
35155"half4 __ovld __cnfn sin(half4);\n"
35156"half8 __ovld __cnfn sin(half8);\n"
35157"half16 __ovld __cnfn sin(half16);\n"
35158"#endif //cl_khr_fp16\n"
35159"\n"
35160"/**\n"
35161" * Compute sine and cosine of x. The computed sine\n"
35162" * is the return value and computed cosine is returned\n"
35163" * in cosval.\n"
35164" */\n"
35165"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
35166"float __ovld sincos(float x, float *cosval);\n"
35167"float2 __ovld sincos(float2 x, float2 *cosval);\n"
35168"float3 __ovld sincos(float3 x, float3 *cosval);\n"
35169"float4 __ovld sincos(float4 x, float4 *cosval);\n"
35170"float8 __ovld sincos(float8 x, float8 *cosval);\n"
35171"float16 __ovld sincos(float16 x, float16 *cosval);\n"
35172"#ifdef cl_khr_fp64\n"
35173"double __ovld sincos(double x, double *cosval);\n"
35174"double2 __ovld sincos(double2 x, double2 *cosval);\n"
35175"double3 __ovld sincos(double3 x, double3 *cosval);\n"
35176"double4 __ovld sincos(double4 x, double4 *cosval);\n"
35177"double8 __ovld sincos(double8 x, double8 *cosval);\n"
35178"double16 __ovld sincos(double16 x, double16 *cosval);\n"
35179"#endif //cl_khr_fp64\n"
35180"#ifdef cl_khr_fp16\n"
35181"half __ovld sincos(half x, half *cosval);\n"
35182"half2 __ovld sincos(half2 x, half2 *cosval);\n"
35183"half3 __ovld sincos(half3 x, half3 *cosval);\n"
35184"half4 __ovld sincos(half4 x, half4 *cosval);\n"
35185"half8 __ovld sincos(half8 x, half8 *cosval);\n"
35186"half16 __ovld sincos(half16 x, half16 *cosval);\n"
35187"#endif //cl_khr_fp16\n"
35188"#else\n"
35189"float __ovld sincos(float x, __global float *cosval);\n"
35190"float2 __ovld sincos(float2 x, __global float2 *cosval);\n"
35191"float3 __ovld sincos(float3 x, __global float3 *cosval);\n"
35192"float4 __ovld sincos(float4 x, __global float4 *cosval);\n"
35193"float8 __ovld sincos(float8 x, __global float8 *cosval);\n"
35194"float16 __ovld sincos(float16 x, __global float16 *cosval);\n"
35195"float __ovld sincos(float x, __local float *cosval);\n"
35196"float2 __ovld sincos(float2 x, __local float2 *cosval);\n"
35197"float3 __ovld sincos(float3 x, __local float3 *cosval);\n"
35198"float4 __ovld sincos(float4 x, __local float4 *cosval);\n"
35199"float8 __ovld sincos(float8 x, __local float8 *cosval);\n"
35200"float16 __ovld sincos(float16 x, __local float16 *cosval);\n"
35201"float __ovld sincos(float x, __private float *cosval);\n"
35202"float2 __ovld sincos(float2 x, __private float2 *cosval);\n"
35203"float3 __ovld sincos(float3 x, __private float3 *cosval);\n"
35204"float4 __ovld sincos(float4 x, __private float4 *cosval);\n"
35205"float8 __ovld sincos(float8 x, __private float8 *cosval);\n"
35206"float16 __ovld sincos(float16 x, __private float16 *cosval);\n"
35207"#ifdef cl_khr_fp64\n"
35208"double __ovld sincos(double x, __global double *cosval);\n"
35209"double2 __ovld sincos(double2 x, __global double2 *cosval);\n"
35210"double3 __ovld sincos(double3 x, __global double3 *cosval);\n"
35211"double4 __ovld sincos(double4 x, __global double4 *cosval);\n"
35212"double8 __ovld sincos(double8 x, __global double8 *cosval);\n"
35213"double16 __ovld sincos(double16 x, __global double16 *cosval);\n"
35214"double __ovld sincos(double x, __local double *cosval);\n"
35215"double2 __ovld sincos(double2 x, __local double2 *cosval);\n"
35216"double3 __ovld sincos(double3 x, __local double3 *cosval);\n"
35217"double4 __ovld sincos(double4 x, __local double4 *cosval);\n"
35218"double8 __ovld sincos(double8 x, __local double8 *cosval);\n"
35219"double16 __ovld sincos(double16 x, __local double16 *cosval);\n"
35220"double __ovld sincos(double x, __private double *cosval);\n"
35221"double2 __ovld sincos(double2 x, __private double2 *cosval);\n"
35222"double3 __ovld sincos(double3 x, __private double3 *cosval);\n"
35223"double4 __ovld sincos(double4 x, __private double4 *cosval);\n"
35224"double8 __ovld sincos(double8 x, __private double8 *cosval);\n"
35225"double16 __ovld sincos(double16 x, __private double16 *cosval);\n"
35226"#endif //cl_khr_fp64\n"
35227"#ifdef cl_khr_fp16\n"
35228"half __ovld sincos(half x, __global half *cosval);\n"
35229"half2 __ovld sincos(half2 x, __global half2 *cosval);\n"
35230"half3 __ovld sincos(half3 x, __global half3 *cosval);\n"
35231"half4 __ovld sincos(half4 x, __global half4 *cosval);\n"
35232"half8 __ovld sincos(half8 x, __global half8 *cosval);\n"
35233"half16 __ovld sincos(half16 x, __global half16 *cosval);\n"
35234"half __ovld sincos(half x, __local half *cosval);\n"
35235"half2 __ovld sincos(half2 x, __local half2 *cosval);\n"
35236"half3 __ovld sincos(half3 x, __local half3 *cosval);\n"
35237"half4 __ovld sincos(half4 x, __local half4 *cosval);\n"
35238"half8 __ovld sincos(half8 x, __local half8 *cosval);\n"
35239"half16 __ovld sincos(half16 x, __local half16 *cosval);\n"
35240"half __ovld sincos(half x, __private half *cosval);\n"
35241"half2 __ovld sincos(half2 x, __private half2 *cosval);\n"
35242"half3 __ovld sincos(half3 x, __private half3 *cosval);\n"
35243"half4 __ovld sincos(half4 x, __private half4 *cosval);\n"
35244"half8 __ovld sincos(half8 x, __private half8 *cosval);\n"
35245"half16 __ovld sincos(half16 x, __private half16 *cosval);\n"
35246"#endif //cl_khr_fp16\n"
35247"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
35248"\n"
35249"/**\n"
35250" * Compute hyperbolic sine.\n"
35251" */\n"
35252"float __ovld __cnfn sinh(float);\n"
35253"float2 __ovld __cnfn sinh(float2);\n"
35254"float3 __ovld __cnfn sinh(float3);\n"
35255"float4 __ovld __cnfn sinh(float4);\n"
35256"float8 __ovld __cnfn sinh(float8);\n"
35257"float16 __ovld __cnfn sinh(float16);\n"
35258"#ifdef cl_khr_fp64\n"
35259"double __ovld __cnfn sinh(double);\n"
35260"double2 __ovld __cnfn sinh(double2);\n"
35261"double3 __ovld __cnfn sinh(double3);\n"
35262"double4 __ovld __cnfn sinh(double4);\n"
35263"double8 __ovld __cnfn sinh(double8);\n"
35264"double16 __ovld __cnfn sinh(double16);\n"
35265"#endif //cl_khr_fp64\n"
35266"#ifdef cl_khr_fp16\n"
35267"half __ovld __cnfn sinh(half);\n"
35268"half2 __ovld __cnfn sinh(half2);\n"
35269"half3 __ovld __cnfn sinh(half3);\n"
35270"half4 __ovld __cnfn sinh(half4);\n"
35271"half8 __ovld __cnfn sinh(half8);\n"
35272"half16 __ovld __cnfn sinh(half16);\n"
35273"#endif //cl_khr_fp16\n"
35274"\n"
35275"/**\n"
35276" * Compute sin (PI * x).\n"
35277" */\n"
35278"float __ovld __cnfn sinpi(float x);\n"
35279"float2 __ovld __cnfn sinpi(float2 x);\n"
35280"float3 __ovld __cnfn sinpi(float3 x);\n"
35281"float4 __ovld __cnfn sinpi(float4 x);\n"
35282"float8 __ovld __cnfn sinpi(float8 x);\n"
35283"float16 __ovld __cnfn sinpi(float16 x);\n"
35284"#ifdef cl_khr_fp64\n"
35285"double __ovld __cnfn sinpi(double x);\n"
35286"double2 __ovld __cnfn sinpi(double2 x);\n"
35287"double3 __ovld __cnfn sinpi(double3 x);\n"
35288"double4 __ovld __cnfn sinpi(double4 x);\n"
35289"double8 __ovld __cnfn sinpi(double8 x);\n"
35290"double16 __ovld __cnfn sinpi(double16 x);\n"
35291"#endif //cl_khr_fp64\n"
35292"#ifdef cl_khr_fp16\n"
35293"half __ovld __cnfn sinpi(half x);\n"
35294"half2 __ovld __cnfn sinpi(half2 x);\n"
35295"half3 __ovld __cnfn sinpi(half3 x);\n"
35296"half4 __ovld __cnfn sinpi(half4 x);\n"
35297"half8 __ovld __cnfn sinpi(half8 x);\n"
35298"half16 __ovld __cnfn sinpi(half16 x);\n"
35299"#endif //cl_khr_fp16\n"
35300"\n"
35301"/**\n"
35302" * Compute square root.\n"
35303" */\n"
35304"float __ovld __cnfn sqrt(float);\n"
35305"float2 __ovld __cnfn sqrt(float2);\n"
35306"float3 __ovld __cnfn sqrt(float3);\n"
35307"float4 __ovld __cnfn sqrt(float4);\n"
35308"float8 __ovld __cnfn sqrt(float8);\n"
35309"float16 __ovld __cnfn sqrt(float16);\n"
35310"#ifdef cl_khr_fp64\n"
35311"double __ovld __cnfn sqrt(double);\n"
35312"double2 __ovld __cnfn sqrt(double2);\n"
35313"double3 __ovld __cnfn sqrt(double3);\n"
35314"double4 __ovld __cnfn sqrt(double4);\n"
35315"double8 __ovld __cnfn sqrt(double8);\n"
35316"double16 __ovld __cnfn sqrt(double16);\n"
35317"#endif //cl_khr_fp64\n"
35318"#ifdef cl_khr_fp16\n"
35319"half __ovld __cnfn sqrt(half);\n"
35320"half2 __ovld __cnfn sqrt(half2);\n"
35321"half3 __ovld __cnfn sqrt(half3);\n"
35322"half4 __ovld __cnfn sqrt(half4);\n"
35323"half8 __ovld __cnfn sqrt(half8);\n"
35324"half16 __ovld __cnfn sqrt(half16);\n"
35325"#endif //cl_khr_fp16\n"
35326"\n"
35327"/**\n"
35328" * Compute tangent.\n"
35329" */\n"
35330"float __ovld __cnfn tan(float);\n"
35331"float2 __ovld __cnfn tan(float2);\n"
35332"float3 __ovld __cnfn tan(float3);\n"
35333"float4 __ovld __cnfn tan(float4);\n"
35334"float8 __ovld __cnfn tan(float8);\n"
35335"float16 __ovld __cnfn tan(float16);\n"
35336"#ifdef cl_khr_fp64\n"
35337"double __ovld __cnfn tan(double);\n"
35338"double2 __ovld __cnfn tan(double2);\n"
35339"double3 __ovld __cnfn tan(double3);\n"
35340"double4 __ovld __cnfn tan(double4);\n"
35341"double8 __ovld __cnfn tan(double8);\n"
35342"double16 __ovld __cnfn tan(double16);\n"
35343"#endif //cl_khr_fp64\n"
35344"#ifdef cl_khr_fp16\n"
35345"half __ovld __cnfn tan(half);\n"
35346"half2 __ovld __cnfn tan(half2);\n"
35347"half3 __ovld __cnfn tan(half3);\n"
35348"half4 __ovld __cnfn tan(half4);\n"
35349"half8 __ovld __cnfn tan(half8);\n"
35350"half16 __ovld __cnfn tan(half16);\n"
35351"#endif //cl_khr_fp16\n"
35352"\n"
35353"/**\n"
35354" * Compute hyperbolic tangent.\n"
35355" */\n"
35356"float __ovld __cnfn tanh(float);\n"
35357"float2 __ovld __cnfn tanh(float2);\n"
35358"float3 __ovld __cnfn tanh(float3);\n"
35359"float4 __ovld __cnfn tanh(float4);\n"
35360"float8 __ovld __cnfn tanh(float8);\n"
35361"float16 __ovld __cnfn tanh(float16);\n"
35362"#ifdef cl_khr_fp64\n"
35363"double __ovld __cnfn tanh(double);\n"
35364"double2 __ovld __cnfn tanh(double2);\n"
35365"double3 __ovld __cnfn tanh(double3);\n"
35366"double4 __ovld __cnfn tanh(double4);\n"
35367"double8 __ovld __cnfn tanh(double8);\n"
35368"double16 __ovld __cnfn tanh(double16);\n"
35369"#endif //cl_khr_fp64\n"
35370"#ifdef cl_khr_fp16\n"
35371"half __ovld __cnfn tanh(half);\n"
35372"half2 __ovld __cnfn tanh(half2);\n"
35373"half3 __ovld __cnfn tanh(half3);\n"
35374"half4 __ovld __cnfn tanh(half4);\n"
35375"half8 __ovld __cnfn tanh(half8);\n"
35376"half16 __ovld __cnfn tanh(half16);\n"
35377"#endif //cl_khr_fp16\n"
35378"\n"
35379"/**\n"
35380" * Compute tan (PI * x).\n"
35381" */\n"
35382"float __ovld __cnfn tanpi(float x);\n"
35383"float2 __ovld __cnfn tanpi(float2 x);\n"
35384"float3 __ovld __cnfn tanpi(float3 x);\n"
35385"float4 __ovld __cnfn tanpi(float4 x);\n"
35386"float8 __ovld __cnfn tanpi(float8 x);\n"
35387"float16 __ovld __cnfn tanpi(float16 x);\n"
35388"#ifdef cl_khr_fp64\n"
35389"double __ovld __cnfn tanpi(double x);\n"
35390"double2 __ovld __cnfn tanpi(double2 x);\n"
35391"double3 __ovld __cnfn tanpi(double3 x);\n"
35392"double4 __ovld __cnfn tanpi(double4 x);\n"
35393"double8 __ovld __cnfn tanpi(double8 x);\n"
35394"double16 __ovld __cnfn tanpi(double16 x);\n"
35395"#endif //cl_khr_fp64\n"
35396"#ifdef cl_khr_fp16\n"
35397"half __ovld __cnfn tanpi(half x);\n"
35398"half2 __ovld __cnfn tanpi(half2 x);\n"
35399"half3 __ovld __cnfn tanpi(half3 x);\n"
35400"half4 __ovld __cnfn tanpi(half4 x);\n"
35401"half8 __ovld __cnfn tanpi(half8 x);\n"
35402"half16 __ovld __cnfn tanpi(half16 x);\n"
35403"#endif //cl_khr_fp16\n"
35404"\n"
35405"/**\n"
35406" * Compute the gamma function.\n"
35407" */\n"
35408"float __ovld __cnfn tgamma(float);\n"
35409"float2 __ovld __cnfn tgamma(float2);\n"
35410"float3 __ovld __cnfn tgamma(float3);\n"
35411"float4 __ovld __cnfn tgamma(float4);\n"
35412"float8 __ovld __cnfn tgamma(float8);\n"
35413"float16 __ovld __cnfn tgamma(float16);\n"
35414"#ifdef cl_khr_fp64\n"
35415"double __ovld __cnfn tgamma(double);\n"
35416"double2 __ovld __cnfn tgamma(double2);\n"
35417"double3 __ovld __cnfn tgamma(double3);\n"
35418"double4 __ovld __cnfn tgamma(double4);\n"
35419"double8 __ovld __cnfn tgamma(double8);\n"
35420"double16 __ovld __cnfn tgamma(double16);\n"
35421"#endif //cl_khr_fp64\n"
35422"#ifdef cl_khr_fp16\n"
35423"half __ovld __cnfn tgamma(half);\n"
35424"half2 __ovld __cnfn tgamma(half2);\n"
35425"half3 __ovld __cnfn tgamma(half3);\n"
35426"half4 __ovld __cnfn tgamma(half4);\n"
35427"half8 __ovld __cnfn tgamma(half8);\n"
35428"half16 __ovld __cnfn tgamma(half16);\n"
35429"#endif //cl_khr_fp16\n"
35430"\n"
35431"/**\n"
35432" * Round to integral value using the round to zero\n"
35433" * rounding mode.\n"
35434" */\n"
35435"float __ovld __cnfn trunc(float);\n"
35436"float2 __ovld __cnfn trunc(float2);\n"
35437"float3 __ovld __cnfn trunc(float3);\n"
35438"float4 __ovld __cnfn trunc(float4);\n"
35439"float8 __ovld __cnfn trunc(float8);\n"
35440"float16 __ovld __cnfn trunc(float16);\n"
35441"#ifdef cl_khr_fp64\n"
35442"double __ovld __cnfn trunc(double);\n"
35443"double2 __ovld __cnfn trunc(double2);\n"
35444"double3 __ovld __cnfn trunc(double3);\n"
35445"double4 __ovld __cnfn trunc(double4);\n"
35446"double8 __ovld __cnfn trunc(double8);\n"
35447"double16 __ovld __cnfn trunc(double16);\n"
35448"#endif //cl_khr_fp64\n"
35449"#ifdef cl_khr_fp16\n"
35450"half __ovld __cnfn trunc(half);\n"
35451"half2 __ovld __cnfn trunc(half2);\n"
35452"half3 __ovld __cnfn trunc(half3);\n"
35453"half4 __ovld __cnfn trunc(half4);\n"
35454"half8 __ovld __cnfn trunc(half8);\n"
35455"half16 __ovld __cnfn trunc(half16);\n"
35456"#endif //cl_khr_fp16\n"
35457"\n"
35458"/**\n"
35459" * Compute cosine. x must be in the range -2^16 ... +2^16.\n"
35460" */\n"
35461"float __ovld __cnfn half_cos(float x);\n"
35462"float2 __ovld __cnfn half_cos(float2 x);\n"
35463"float3 __ovld __cnfn half_cos(float3 x);\n"
35464"float4 __ovld __cnfn half_cos(float4 x);\n"
35465"float8 __ovld __cnfn half_cos(float8 x);\n"
35466"float16 __ovld __cnfn half_cos(float16 x);\n"
35467"\n"
35468"/**\n"
35469" * Compute x / y.\n"
35470" */\n"
35471"float __ovld __cnfn half_divide(float x, float y);\n"
35472"float2 __ovld __cnfn half_divide(float2 x, float2 y);\n"
35473"float3 __ovld __cnfn half_divide(float3 x, float3 y);\n"
35474"float4 __ovld __cnfn half_divide(float4 x, float4 y);\n"
35475"float8 __ovld __cnfn half_divide(float8 x, float8 y);\n"
35476"float16 __ovld __cnfn half_divide(float16 x, float16 y);\n"
35477"\n"
35478"/**\n"
35479" * Compute the base- e exponential of x.\n"
35480" */\n"
35481"float __ovld __cnfn half_exp(float x);\n"
35482"float2 __ovld __cnfn half_exp(float2 x);\n"
35483"float3 __ovld __cnfn half_exp(float3 x);\n"
35484"float4 __ovld __cnfn half_exp(float4 x);\n"
35485"float8 __ovld __cnfn half_exp(float8 x);\n"
35486"float16 __ovld __cnfn half_exp(float16 x);\n"
35487"\n"
35488"/**\n"
35489" * Compute the base- 2 exponential of x.\n"
35490" */\n"
35491"float __ovld __cnfn half_exp2(float x);\n"
35492"float2 __ovld __cnfn half_exp2(float2 x);\n"
35493"float3 __ovld __cnfn half_exp2(float3 x);\n"
35494"float4 __ovld __cnfn half_exp2(float4 x);\n"
35495"float8 __ovld __cnfn half_exp2(float8 x);\n"
35496"float16 __ovld __cnfn half_exp2(float16 x);\n"
35497"\n"
35498"/**\n"
35499" * Compute the base- 10 exponential of x.\n"
35500" */\n"
35501"float __ovld __cnfn half_exp10(float x);\n"
35502"float2 __ovld __cnfn half_exp10(float2 x);\n"
35503"float3 __ovld __cnfn half_exp10(float3 x);\n"
35504"float4 __ovld __cnfn half_exp10(float4 x);\n"
35505"float8 __ovld __cnfn half_exp10(float8 x);\n"
35506"float16 __ovld __cnfn half_exp10(float16 x);\n"
35507"\n"
35508"/**\n"
35509" * Compute natural logarithm.\n"
35510" */\n"
35511"float __ovld __cnfn half_log(float x);\n"
35512"float2 __ovld __cnfn half_log(float2 x);\n"
35513"float3 __ovld __cnfn half_log(float3 x);\n"
35514"float4 __ovld __cnfn half_log(float4 x);\n"
35515"float8 __ovld __cnfn half_log(float8 x);\n"
35516"float16 __ovld __cnfn half_log(float16 x);\n"
35517"\n"
35518"/**\n"
35519" * Compute a base 2 logarithm.\n"
35520" */\n"
35521"float __ovld __cnfn half_log2(float x);\n"
35522"float2 __ovld __cnfn half_log2(float2 x);\n"
35523"float3 __ovld __cnfn half_log2(float3 x);\n"
35524"float4 __ovld __cnfn half_log2(float4 x);\n"
35525"float8 __ovld __cnfn half_log2(float8 x);\n"
35526"float16 __ovld __cnfn half_log2(float16 x);\n"
35527"\n"
35528"/**\n"
35529" * Compute a base 10 logarithm.\n"
35530" */\n"
35531"float __ovld __cnfn half_log10(float x);\n"
35532"float2 __ovld __cnfn half_log10(float2 x);\n"
35533"float3 __ovld __cnfn half_log10(float3 x);\n"
35534"float4 __ovld __cnfn half_log10(float4 x);\n"
35535"float8 __ovld __cnfn half_log10(float8 x);\n"
35536"float16 __ovld __cnfn half_log10(float16 x);\n"
35537"\n"
35538"/**\n"
35539" * Compute x to the power y, where x is >= 0.\n"
35540" */\n"
35541"float __ovld __cnfn half_powr(float x, float y);\n"
35542"float2 __ovld __cnfn half_powr(float2 x, float2 y);\n"
35543"float3 __ovld __cnfn half_powr(float3 x, float3 y);\n"
35544"float4 __ovld __cnfn half_powr(float4 x, float4 y);\n"
35545"float8 __ovld __cnfn half_powr(float8 x, float8 y);\n"
35546"float16 __ovld __cnfn half_powr(float16 x, float16 y);\n"
35547"\n"
35548"/**\n"
35549" * Compute reciprocal.\n"
35550" */\n"
35551"float __ovld __cnfn half_recip(float x);\n"
35552"float2 __ovld __cnfn half_recip(float2 x);\n"
35553"float3 __ovld __cnfn half_recip(float3 x);\n"
35554"float4 __ovld __cnfn half_recip(float4 x);\n"
35555"float8 __ovld __cnfn half_recip(float8 x);\n"
35556"float16 __ovld __cnfn half_recip(float16 x);\n"
35557"\n"
35558"/**\n"
35559" * Compute inverse square root.\n"
35560" */\n"
35561"float __ovld __cnfn half_rsqrt(float x);\n"
35562"float2 __ovld __cnfn half_rsqrt(float2 x);\n"
35563"float3 __ovld __cnfn half_rsqrt(float3 x);\n"
35564"float4 __ovld __cnfn half_rsqrt(float4 x);\n"
35565"float8 __ovld __cnfn half_rsqrt(float8 x);\n"
35566"float16 __ovld __cnfn half_rsqrt(float16 x);\n"
35567"\n"
35568"/**\n"
35569" * Compute sine. x must be in the range -2^16 ... +2^16.\n"
35570" */\n"
35571"float __ovld __cnfn half_sin(float x);\n"
35572"float2 __ovld __cnfn half_sin(float2 x);\n"
35573"float3 __ovld __cnfn half_sin(float3 x);\n"
35574"float4 __ovld __cnfn half_sin(float4 x);\n"
35575"float8 __ovld __cnfn half_sin(float8 x);\n"
35576"float16 __ovld __cnfn half_sin(float16 x);\n"
35577"\n"
35578"/**\n"
35579" * Compute square root.\n"
35580" */\n"
35581"float __ovld __cnfn half_sqrt(float x);\n"
35582"float2 __ovld __cnfn half_sqrt(float2 x);\n"
35583"float3 __ovld __cnfn half_sqrt(float3 x);\n"
35584"float4 __ovld __cnfn half_sqrt(float4 x);\n"
35585"float8 __ovld __cnfn half_sqrt(float8 x);\n"
35586"float16 __ovld __cnfn half_sqrt(float16 x);\n"
35587"\n"
35588"/**\n"
35589" * Compute tangent. x must be in the range -216 ... +216.\n"
35590" */\n"
35591"float __ovld __cnfn half_tan(float x);\n"
35592"float2 __ovld __cnfn half_tan(float2 x);\n"
35593"float3 __ovld __cnfn half_tan(float3 x);\n"
35594"float4 __ovld __cnfn half_tan(float4 x);\n"
35595"float8 __ovld __cnfn half_tan(float8 x);\n"
35596"float16 __ovld __cnfn half_tan(float16 x);\n"
35597"\n"
35598"/**\n"
35599" * Compute cosine over an implementation-defined range.\n"
35600" * The maximum error is implementation-defined.\n"
35601" */\n"
35602"float __ovld __cnfn native_cos(float x);\n"
35603"float2 __ovld __cnfn native_cos(float2 x);\n"
35604"float3 __ovld __cnfn native_cos(float3 x);\n"
35605"float4 __ovld __cnfn native_cos(float4 x);\n"
35606"float8 __ovld __cnfn native_cos(float8 x);\n"
35607"float16 __ovld __cnfn native_cos(float16 x);\n"
35608"\n"
35609"/**\n"
35610" * Compute x / y over an implementation-defined range.\n"
35611" * The maximum error is implementation-defined.\n"
35612" */\n"
35613"float __ovld __cnfn native_divide(float x, float y);\n"
35614"float2 __ovld __cnfn native_divide(float2 x, float2 y);\n"
35615"float3 __ovld __cnfn native_divide(float3 x, float3 y);\n"
35616"float4 __ovld __cnfn native_divide(float4 x, float4 y);\n"
35617"float8 __ovld __cnfn native_divide(float8 x, float8 y);\n"
35618"float16 __ovld __cnfn native_divide(float16 x, float16 y);\n"
35619"\n"
35620"/**\n"
35621" * Compute the base- e exponential of x over an\n"
35622" * implementation-defined range. The maximum error is\n"
35623" * implementation-defined.\n"
35624" */\n"
35625"float __ovld __cnfn native_exp(float x);\n"
35626"float2 __ovld __cnfn native_exp(float2 x);\n"
35627"float3 __ovld __cnfn native_exp(float3 x);\n"
35628"float4 __ovld __cnfn native_exp(float4 x);\n"
35629"float8 __ovld __cnfn native_exp(float8 x);\n"
35630"float16 __ovld __cnfn native_exp(float16 x);\n"
35631"\n"
35632"/**\n"
35633" * Compute the base- 2 exponential of x over an\n"
35634" * implementation-defined range. The maximum error is\n"
35635" * implementation-defined.\n"
35636" */\n"
35637"float __ovld __cnfn native_exp2(float x);\n"
35638"float2 __ovld __cnfn native_exp2(float2 x);\n"
35639"float3 __ovld __cnfn native_exp2(float3 x);\n"
35640"float4 __ovld __cnfn native_exp2(float4 x);\n"
35641"float8 __ovld __cnfn native_exp2(float8 x);\n"
35642"float16 __ovld __cnfn native_exp2(float16 x);\n"
35643"\n"
35644"/**\n"
35645" * Compute the base- 10 exponential of x over an\n"
35646" * implementation-defined range. The maximum error is\n"
35647" * implementation-defined.\n"
35648" */\n"
35649"float __ovld __cnfn native_exp10(float x);\n"
35650"float2 __ovld __cnfn native_exp10(float2 x);\n"
35651"float3 __ovld __cnfn native_exp10(float3 x);\n"
35652"float4 __ovld __cnfn native_exp10(float4 x);\n"
35653"float8 __ovld __cnfn native_exp10(float8 x);\n"
35654"float16 __ovld __cnfn native_exp10(float16 x);\n"
35655"\n"
35656"/**\n"
35657" * Compute natural logarithm over an implementationdefined\n"
35658" * range. The maximum error is implementation\n"
35659" * defined.\n"
35660" */\n"
35661"float __ovld __cnfn native_log(float x);\n"
35662"float2 __ovld __cnfn native_log(float2 x);\n"
35663"float3 __ovld __cnfn native_log(float3 x);\n"
35664"float4 __ovld __cnfn native_log(float4 x);\n"
35665"float8 __ovld __cnfn native_log(float8 x);\n"
35666"float16 __ovld __cnfn native_log(float16 x);\n"
35667"\n"
35668"/**\n"
35669" * Compute a base 2 logarithm over an implementationdefined\n"
35670" * range. The maximum error is implementationdefined.\n"
35671" */\n"
35672"float __ovld __cnfn native_log2(float x);\n"
35673"float2 __ovld __cnfn native_log2(float2 x);\n"
35674"float3 __ovld __cnfn native_log2(float3 x);\n"
35675"float4 __ovld __cnfn native_log2(float4 x);\n"
35676"float8 __ovld __cnfn native_log2(float8 x);\n"
35677"float16 __ovld __cnfn native_log2(float16 x);\n"
35678"\n"
35679"/**\n"
35680" * Compute a base 10 logarithm over an implementationdefined\n"
35681" * range. The maximum error is implementationdefined.\n"
35682" */\n"
35683"float __ovld __cnfn native_log10(float x);\n"
35684"float2 __ovld __cnfn native_log10(float2 x);\n"
35685"float3 __ovld __cnfn native_log10(float3 x);\n"
35686"float4 __ovld __cnfn native_log10(float4 x);\n"
35687"float8 __ovld __cnfn native_log10(float8 x);\n"
35688"float16 __ovld __cnfn native_log10(float16 x);\n"
35689"\n"
35690"/**\n"
35691" * Compute x to the power y, where x is >= 0. The range of\n"
35692" * x and y are implementation-defined. The maximum error\n"
35693" * is implementation-defined.\n"
35694" */\n"
35695"float __ovld __cnfn native_powr(float x, float y);\n"
35696"float2 __ovld __cnfn native_powr(float2 x, float2 y);\n"
35697"float3 __ovld __cnfn native_powr(float3 x, float3 y);\n"
35698"float4 __ovld __cnfn native_powr(float4 x, float4 y);\n"
35699"float8 __ovld __cnfn native_powr(float8 x, float8 y);\n"
35700"float16 __ovld __cnfn native_powr(float16 x, float16 y);\n"
35701"\n"
35702"/**\n"
35703" * Compute reciprocal over an implementation-defined\n"
35704" * range. The maximum error is implementation-defined.\n"
35705" */\n"
35706"float __ovld __cnfn native_recip(float x);\n"
35707"float2 __ovld __cnfn native_recip(float2 x);\n"
35708"float3 __ovld __cnfn native_recip(float3 x);\n"
35709"float4 __ovld __cnfn native_recip(float4 x);\n"
35710"float8 __ovld __cnfn native_recip(float8 x);\n"
35711"float16 __ovld __cnfn native_recip(float16 x);\n"
35712"\n"
35713"/**\n"
35714" * Compute inverse square root over an implementationdefined\n"
35715" * range. The maximum error is implementationdefined.\n"
35716" */\n"
35717"float __ovld __cnfn native_rsqrt(float x);\n"
35718"float2 __ovld __cnfn native_rsqrt(float2 x);\n"
35719"float3 __ovld __cnfn native_rsqrt(float3 x);\n"
35720"float4 __ovld __cnfn native_rsqrt(float4 x);\n"
35721"float8 __ovld __cnfn native_rsqrt(float8 x);\n"
35722"float16 __ovld __cnfn native_rsqrt(float16 x);\n"
35723"\n"
35724"/**\n"
35725" * Compute sine over an implementation-defined range.\n"
35726" * The maximum error is implementation-defined.\n"
35727" */\n"
35728"float __ovld __cnfn native_sin(float x);\n"
35729"float2 __ovld __cnfn native_sin(float2 x);\n"
35730"float3 __ovld __cnfn native_sin(float3 x);\n"
35731"float4 __ovld __cnfn native_sin(float4 x);\n"
35732"float8 __ovld __cnfn native_sin(float8 x);\n"
35733"float16 __ovld __cnfn native_sin(float16 x);\n"
35734"\n"
35735"/**\n"
35736" * Compute square root over an implementation-defined\n"
35737" * range. The maximum error is implementation-defined.\n"
35738" */\n"
35739"float __ovld __cnfn native_sqrt(float x);\n"
35740"float2 __ovld __cnfn native_sqrt(float2 x);\n"
35741"float3 __ovld __cnfn native_sqrt(float3 x);\n"
35742"float4 __ovld __cnfn native_sqrt(float4 x);\n"
35743"float8 __ovld __cnfn native_sqrt(float8 x);\n"
35744"float16 __ovld __cnfn native_sqrt(float16 x);\n"
35745"\n"
35746"/**\n"
35747" * Compute tangent over an implementation-defined range.\n"
35748" * The maximum error is implementation-defined.\n"
35749" */\n"
35750"float __ovld __cnfn native_tan(float x);\n"
35751"float2 __ovld __cnfn native_tan(float2 x);\n"
35752"float3 __ovld __cnfn native_tan(float3 x);\n"
35753"float4 __ovld __cnfn native_tan(float4 x);\n"
35754"float8 __ovld __cnfn native_tan(float8 x);\n"
35755"float16 __ovld __cnfn native_tan(float16 x);\n"
35756"\n"
35757"// OpenCL v1.1 s6.11.3, v1.2 s6.12.3, v2.0 s6.13.3 - Integer Functions\n"
35758"\n"
35759"/**\n"
35760" * Returns | x |.\n"
35761" */\n"
35762"uchar __ovld __cnfn abs(char x);\n"
35763"uchar __ovld __cnfn abs(uchar x);\n"
35764"uchar2 __ovld __cnfn abs(char2 x);\n"
35765"uchar2 __ovld __cnfn abs(uchar2 x);\n"
35766"uchar3 __ovld __cnfn abs(char3 x);\n"
35767"uchar3 __ovld __cnfn abs(uchar3 x);\n"
35768"uchar4 __ovld __cnfn abs(char4 x);\n"
35769"uchar4 __ovld __cnfn abs(uchar4 x);\n"
35770"uchar8 __ovld __cnfn abs(char8 x);\n"
35771"uchar8 __ovld __cnfn abs(uchar8 x);\n"
35772"uchar16 __ovld __cnfn abs(char16 x);\n"
35773"uchar16 __ovld __cnfn abs(uchar16 x);\n"
35774"ushort __ovld __cnfn abs(short x);\n"
35775"ushort __ovld __cnfn abs(ushort x);\n"
35776"ushort2 __ovld __cnfn abs(short2 x);\n"
35777"ushort2 __ovld __cnfn abs(ushort2 x);\n"
35778"ushort3 __ovld __cnfn abs(short3 x);\n"
35779"ushort3 __ovld __cnfn abs(ushort3 x);\n"
35780"ushort4 __ovld __cnfn abs(short4 x);\n"
35781"ushort4 __ovld __cnfn abs(ushort4 x);\n"
35782"ushort8 __ovld __cnfn abs(short8 x);\n"
35783"ushort8 __ovld __cnfn abs(ushort8 x);\n"
35784"ushort16 __ovld __cnfn abs(short16 x);\n"
35785"ushort16 __ovld __cnfn abs(ushort16 x);\n"
35786"uint __ovld __cnfn abs(int x);\n"
35787"uint __ovld __cnfn abs(uint x);\n"
35788"uint2 __ovld __cnfn abs(int2 x);\n"
35789"uint2 __ovld __cnfn abs(uint2 x);\n"
35790"uint3 __ovld __cnfn abs(int3 x);\n"
35791"uint3 __ovld __cnfn abs(uint3 x);\n"
35792"uint4 __ovld __cnfn abs(int4 x);\n"
35793"uint4 __ovld __cnfn abs(uint4 x);\n"
35794"uint8 __ovld __cnfn abs(int8 x);\n"
35795"uint8 __ovld __cnfn abs(uint8 x);\n"
35796"uint16 __ovld __cnfn abs(int16 x);\n"
35797"uint16 __ovld __cnfn abs(uint16 x);\n"
35798"ulong __ovld __cnfn abs(long x);\n"
35799"ulong __ovld __cnfn abs(ulong x);\n"
35800"ulong2 __ovld __cnfn abs(long2 x);\n"
35801"ulong2 __ovld __cnfn abs(ulong2 x);\n"
35802"ulong3 __ovld __cnfn abs(long3 x);\n"
35803"ulong3 __ovld __cnfn abs(ulong3 x);\n"
35804"ulong4 __ovld __cnfn abs(long4 x);\n"
35805"ulong4 __ovld __cnfn abs(ulong4 x);\n"
35806"ulong8 __ovld __cnfn abs(long8 x);\n"
35807"ulong8 __ovld __cnfn abs(ulong8 x);\n"
35808"ulong16 __ovld __cnfn abs(long16 x);\n"
35809"ulong16 __ovld __cnfn abs(ulong16 x);\n"
35810"\n"
35811"/**\n"
35812" * Returns | x - y | without modulo overflow.\n"
35813" */\n"
35814"uchar __ovld __cnfn abs_diff(char x, char y);\n"
35815"uchar __ovld __cnfn abs_diff(uchar x, uchar y);\n"
35816"uchar2 __ovld __cnfn abs_diff(char2 x, char2 y);\n"
35817"uchar2 __ovld __cnfn abs_diff(uchar2 x, uchar2 y);\n"
35818"uchar3 __ovld __cnfn abs_diff(char3 x, char3 y);\n"
35819"uchar3 __ovld __cnfn abs_diff(uchar3 x, uchar3 y);\n"
35820"uchar4 __ovld __cnfn abs_diff(char4 x, char4 y);\n"
35821"uchar4 __ovld __cnfn abs_diff(uchar4 x, uchar4 y);\n"
35822"uchar8 __ovld __cnfn abs_diff(char8 x, char8 y);\n"
35823"uchar8 __ovld __cnfn abs_diff(uchar8 x, uchar8 y);\n"
35824"uchar16 __ovld __cnfn abs_diff(char16 x, char16 y);\n"
35825"uchar16 __ovld __cnfn abs_diff(uchar16 x, uchar16 y);\n"
35826"ushort __ovld __cnfn abs_diff(short x, short y);\n"
35827"ushort __ovld __cnfn abs_diff(ushort x, ushort y);\n"
35828"ushort2 __ovld __cnfn abs_diff(short2 x, short2 y);\n"
35829"ushort2 __ovld __cnfn abs_diff(ushort2 x, ushort2 y);\n"
35830"ushort3 __ovld __cnfn abs_diff(short3 x, short3 y);\n"
35831"ushort3 __ovld __cnfn abs_diff(ushort3 x, ushort3 y);\n"
35832"ushort4 __ovld __cnfn abs_diff(short4 x, short4 y);\n"
35833"ushort4 __ovld __cnfn abs_diff(ushort4 x, ushort4 y);\n"
35834"ushort8 __ovld __cnfn abs_diff(short8 x, short8 y);\n"
35835"ushort8 __ovld __cnfn abs_diff(ushort8 x, ushort8 y);\n"
35836"ushort16 __ovld __cnfn abs_diff(short16 x, short16 y);\n"
35837"ushort16 __ovld __cnfn abs_diff(ushort16 x, ushort16 y);\n"
35838"uint __ovld __cnfn abs_diff(int x, int y);\n"
35839"uint __ovld __cnfn abs_diff(uint x, uint y);\n"
35840"uint2 __ovld __cnfn abs_diff(int2 x, int2 y);\n"
35841"uint2 __ovld __cnfn abs_diff(uint2 x, uint2 y);\n"
35842"uint3 __ovld __cnfn abs_diff(int3 x, int3 y);\n"
35843"uint3 __ovld __cnfn abs_diff(uint3 x, uint3 y);\n"
35844"uint4 __ovld __cnfn abs_diff(int4 x, int4 y);\n"
35845"uint4 __ovld __cnfn abs_diff(uint4 x, uint4 y);\n"
35846"uint8 __ovld __cnfn abs_diff(int8 x, int8 y);\n"
35847"uint8 __ovld __cnfn abs_diff(uint8 x, uint8 y);\n"
35848"uint16 __ovld __cnfn abs_diff(int16 x, int16 y);\n"
35849"uint16 __ovld __cnfn abs_diff(uint16 x, uint16 y);\n"
35850"ulong __ovld __cnfn abs_diff(long x, long y);\n"
35851"ulong __ovld __cnfn abs_diff(ulong x, ulong y);\n"
35852"ulong2 __ovld __cnfn abs_diff(long2 x, long2 y);\n"
35853"ulong2 __ovld __cnfn abs_diff(ulong2 x, ulong2 y);\n"
35854"ulong3 __ovld __cnfn abs_diff(long3 x, long3 y);\n"
35855"ulong3 __ovld __cnfn abs_diff(ulong3 x, ulong3 y);\n"
35856"ulong4 __ovld __cnfn abs_diff(long4 x, long4 y);\n"
35857"ulong4 __ovld __cnfn abs_diff(ulong4 x, ulong4 y);\n"
35858"ulong8 __ovld __cnfn abs_diff(long8 x, long8 y);\n"
35859"ulong8 __ovld __cnfn abs_diff(ulong8 x, ulong8 y);\n"
35860"ulong16 __ovld __cnfn abs_diff(long16 x, long16 y);\n"
35861"ulong16 __ovld __cnfn abs_diff(ulong16 x, ulong16 y);\n"
35862"\n"
35863"/**\n"
35864" * Returns x + y and saturates the result.\n"
35865" */\n"
35866"char __ovld __cnfn add_sat(char x, char y);\n"
35867"uchar __ovld __cnfn add_sat(uchar x, uchar y);\n"
35868"char2 __ovld __cnfn add_sat(char2 x, char2 y);\n"
35869"uchar2 __ovld __cnfn add_sat(uchar2 x, uchar2 y);\n"
35870"char3 __ovld __cnfn add_sat(char3 x, char3 y);\n"
35871"uchar3 __ovld __cnfn add_sat(uchar3 x, uchar3 y);\n"
35872"char4 __ovld __cnfn add_sat(char4 x, char4 y);\n"
35873"uchar4 __ovld __cnfn add_sat(uchar4 x, uchar4 y);\n"
35874"char8 __ovld __cnfn add_sat(char8 x, char8 y);\n"
35875"uchar8 __ovld __cnfn add_sat(uchar8 x, uchar8 y);\n"
35876"char16 __ovld __cnfn add_sat(char16 x, char16 y);\n"
35877"uchar16 __ovld __cnfn add_sat(uchar16 x, uchar16 y);\n"
35878"short __ovld __cnfn add_sat(short x, short y);\n"
35879"ushort __ovld __cnfn add_sat(ushort x, ushort y);\n"
35880"short2 __ovld __cnfn add_sat(short2 x, short2 y);\n"
35881"ushort2 __ovld __cnfn add_sat(ushort2 x, ushort2 y);\n"
35882"short3 __ovld __cnfn add_sat(short3 x, short3 y);\n"
35883"ushort3 __ovld __cnfn add_sat(ushort3 x, ushort3 y);\n"
35884"short4 __ovld __cnfn add_sat(short4 x, short4 y);\n"
35885"ushort4 __ovld __cnfn add_sat(ushort4 x, ushort4 y);\n"
35886"short8 __ovld __cnfn add_sat(short8 x, short8 y);\n"
35887"ushort8 __ovld __cnfn add_sat(ushort8 x, ushort8 y);\n"
35888"short16 __ovld __cnfn add_sat(short16 x, short16 y);\n"
35889"ushort16 __ovld __cnfn add_sat(ushort16 x, ushort16 y);\n"
35890"int __ovld __cnfn add_sat(int x, int y);\n"
35891"uint __ovld __cnfn add_sat(uint x, uint y);\n"
35892"int2 __ovld __cnfn add_sat(int2 x, int2 y);\n"
35893"uint2 __ovld __cnfn add_sat(uint2 x, uint2 y);\n"
35894"int3 __ovld __cnfn add_sat(int3 x, int3 y);\n"
35895"uint3 __ovld __cnfn add_sat(uint3 x, uint3 y);\n"
35896"int4 __ovld __cnfn add_sat(int4 x, int4 y);\n"
35897"uint4 __ovld __cnfn add_sat(uint4 x, uint4 y);\n"
35898"int8 __ovld __cnfn add_sat(int8 x, int8 y);\n"
35899"uint8 __ovld __cnfn add_sat(uint8 x, uint8 y);\n"
35900"int16 __ovld __cnfn add_sat(int16 x, int16 y);\n"
35901"uint16 __ovld __cnfn add_sat(uint16 x, uint16 y);\n"
35902"long __ovld __cnfn add_sat(long x, long y);\n"
35903"ulong __ovld __cnfn add_sat(ulong x, ulong y);\n"
35904"long2 __ovld __cnfn add_sat(long2 x, long2 y);\n"
35905"ulong2 __ovld __cnfn add_sat(ulong2 x, ulong2 y);\n"
35906"long3 __ovld __cnfn add_sat(long3 x, long3 y);\n"
35907"ulong3 __ovld __cnfn add_sat(ulong3 x, ulong3 y);\n"
35908"long4 __ovld __cnfn add_sat(long4 x, long4 y);\n"
35909"ulong4 __ovld __cnfn add_sat(ulong4 x, ulong4 y);\n"
35910"long8 __ovld __cnfn add_sat(long8 x, long8 y);\n"
35911"ulong8 __ovld __cnfn add_sat(ulong8 x, ulong8 y);\n"
35912"long16 __ovld __cnfn add_sat(long16 x, long16 y);\n"
35913"ulong16 __ovld __cnfn add_sat(ulong16 x, ulong16 y);\n"
35914"\n"
35915"/**\n"
35916" * Returns (x + y) >> 1. The intermediate sum does\n"
35917" * not modulo overflow.\n"
35918" */\n"
35919"char __ovld __cnfn hadd(char x, char y);\n"
35920"uchar __ovld __cnfn hadd(uchar x, uchar y);\n"
35921"char2 __ovld __cnfn hadd(char2 x, char2 y);\n"
35922"uchar2 __ovld __cnfn hadd(uchar2 x, uchar2 y);\n"
35923"char3 __ovld __cnfn hadd(char3 x, char3 y);\n"
35924"uchar3 __ovld __cnfn hadd(uchar3 x, uchar3 y);\n"
35925"char4 __ovld __cnfn hadd(char4 x, char4 y);\n"
35926"uchar4 __ovld __cnfn hadd(uchar4 x, uchar4 y);\n"
35927"char8 __ovld __cnfn hadd(char8 x, char8 y);\n"
35928"uchar8 __ovld __cnfn hadd(uchar8 x, uchar8 y);\n"
35929"char16 __ovld __cnfn hadd(char16 x, char16 y);\n"
35930"uchar16 __ovld __cnfn hadd(uchar16 x, uchar16 y);\n"
35931"short __ovld __cnfn hadd(short x, short y);\n"
35932"ushort __ovld __cnfn hadd(ushort x, ushort y);\n"
35933"short2 __ovld __cnfn hadd(short2 x, short2 y);\n"
35934"ushort2 __ovld __cnfn hadd(ushort2 x, ushort2 y);\n"
35935"short3 __ovld __cnfn hadd(short3 x, short3 y);\n"
35936"ushort3 __ovld __cnfn hadd(ushort3 x, ushort3 y);\n"
35937"short4 __ovld __cnfn hadd(short4 x, short4 y);\n"
35938"ushort4 __ovld __cnfn hadd(ushort4 x, ushort4 y);\n"
35939"short8 __ovld __cnfn hadd(short8 x, short8 y);\n"
35940"ushort8 __ovld __cnfn hadd(ushort8 x, ushort8 y);\n"
35941"short16 __ovld __cnfn hadd(short16 x, short16 y);\n"
35942"ushort16 __ovld __cnfn hadd(ushort16 x, ushort16 y);\n"
35943"int __ovld __cnfn hadd(int x, int y);\n"
35944"uint __ovld __cnfn hadd(uint x, uint y);\n"
35945"int2 __ovld __cnfn hadd(int2 x, int2 y);\n"
35946"uint2 __ovld __cnfn hadd(uint2 x, uint2 y);\n"
35947"int3 __ovld __cnfn hadd(int3 x, int3 y);\n"
35948"uint3 __ovld __cnfn hadd(uint3 x, uint3 y);\n"
35949"int4 __ovld __cnfn hadd(int4 x, int4 y);\n"
35950"uint4 __ovld __cnfn hadd(uint4 x, uint4 y);\n"
35951"int8 __ovld __cnfn hadd(int8 x, int8 y);\n"
35952"uint8 __ovld __cnfn hadd(uint8 x, uint8 y);\n"
35953"int16 __ovld __cnfn hadd(int16 x, int16 y);\n"
35954"uint16 __ovld __cnfn hadd(uint16 x, uint16 y);\n"
35955"long __ovld __cnfn hadd(long x, long y);\n"
35956"ulong __ovld __cnfn hadd(ulong x, ulong y);\n"
35957"long2 __ovld __cnfn hadd(long2 x, long2 y);\n"
35958"ulong2 __ovld __cnfn hadd(ulong2 x, ulong2 y);\n"
35959"long3 __ovld __cnfn hadd(long3 x, long3 y);\n"
35960"ulong3 __ovld __cnfn hadd(ulong3 x, ulong3 y);\n"
35961"long4 __ovld __cnfn hadd(long4 x, long4 y);\n"
35962"ulong4 __ovld __cnfn hadd(ulong4 x, ulong4 y);\n"
35963"long8 __ovld __cnfn hadd(long8 x, long8 y);\n"
35964"ulong8 __ovld __cnfn hadd(ulong8 x, ulong8 y);\n"
35965"long16 __ovld __cnfn hadd(long16 x, long16 y);\n"
35966"ulong16 __ovld __cnfn hadd(ulong16 x, ulong16 y);\n"
35967"\n"
35968"/**\n"
35969" * Returns (x + y + 1) >> 1. The intermediate sum\n"
35970" * does not modulo overflow.\n"
35971" */\n"
35972"char __ovld __cnfn rhadd(char x, char y);\n"
35973"uchar __ovld __cnfn rhadd(uchar x, uchar y);\n"
35974"char2 __ovld __cnfn rhadd(char2 x, char2 y);\n"
35975"uchar2 __ovld __cnfn rhadd(uchar2 x, uchar2 y);\n"
35976"char3 __ovld __cnfn rhadd(char3 x, char3 y);\n"
35977"uchar3 __ovld __cnfn rhadd(uchar3 x, uchar3 y);\n"
35978"char4 __ovld __cnfn rhadd(char4 x, char4 y);\n"
35979"uchar4 __ovld __cnfn rhadd(uchar4 x, uchar4 y);\n"
35980"char8 __ovld __cnfn rhadd(char8 x, char8 y);\n"
35981"uchar8 __ovld __cnfn rhadd(uchar8 x, uchar8 y);\n"
35982"char16 __ovld __cnfn rhadd(char16 x, char16 y);\n"
35983"uchar16 __ovld __cnfn rhadd(uchar16 x, uchar16 y);\n"
35984"short __ovld __cnfn rhadd(short x, short y);\n"
35985"ushort __ovld __cnfn rhadd(ushort x, ushort y);\n"
35986"short2 __ovld __cnfn rhadd(short2 x, short2 y);\n"
35987"ushort2 __ovld __cnfn rhadd(ushort2 x, ushort2 y);\n"
35988"short3 __ovld __cnfn rhadd(short3 x, short3 y);\n"
35989"ushort3 __ovld __cnfn rhadd(ushort3 x, ushort3 y);\n"
35990"short4 __ovld __cnfn rhadd(short4 x, short4 y);\n"
35991"ushort4 __ovld __cnfn rhadd(ushort4 x, ushort4 y);\n"
35992"short8 __ovld __cnfn rhadd(short8 x, short8 y);\n"
35993"ushort8 __ovld __cnfn rhadd(ushort8 x, ushort8 y);\n"
35994"short16 __ovld __cnfn rhadd(short16 x, short16 y);\n"
35995"ushort16 __ovld __cnfn rhadd(ushort16 x, ushort16 y);\n"
35996"int __ovld __cnfn rhadd(int x, int y);\n"
35997"uint __ovld __cnfn rhadd(uint x, uint y);\n"
35998"int2 __ovld __cnfn rhadd(int2 x, int2 y);\n"
35999"uint2 __ovld __cnfn rhadd(uint2 x, uint2 y);\n"
36000"int3 __ovld __cnfn rhadd(int3 x, int3 y);\n"
36001"uint3 __ovld __cnfn rhadd(uint3 x, uint3 y);\n"
36002"int4 __ovld __cnfn rhadd(int4 x, int4 y);\n"
36003"uint4 __ovld __cnfn rhadd(uint4 x, uint4 y);\n"
36004"int8 __ovld __cnfn rhadd(int8 x, int8 y);\n"
36005"uint8 __ovld __cnfn rhadd(uint8 x, uint8 y);\n"
36006"int16 __ovld __cnfn rhadd(int16 x, int16 y);\n"
36007"uint16 __ovld __cnfn rhadd(uint16 x, uint16 y);\n"
36008"long __ovld __cnfn rhadd(long x, long y);\n"
36009"ulong __ovld __cnfn rhadd(ulong x, ulong y);\n"
36010"long2 __ovld __cnfn rhadd(long2 x, long2 y);\n"
36011"ulong2 __ovld __cnfn rhadd(ulong2 x, ulong2 y);\n"
36012"long3 __ovld __cnfn rhadd(long3 x, long3 y);\n"
36013"ulong3 __ovld __cnfn rhadd(ulong3 x, ulong3 y);\n"
36014"long4 __ovld __cnfn rhadd(long4 x, long4 y);\n"
36015"ulong4 __ovld __cnfn rhadd(ulong4 x, ulong4 y);\n"
36016"long8 __ovld __cnfn rhadd(long8 x, long8 y);\n"
36017"ulong8 __ovld __cnfn rhadd(ulong8 x, ulong8 y);\n"
36018"long16 __ovld __cnfn rhadd(long16 x, long16 y);\n"
36019"ulong16 __ovld __cnfn rhadd(ulong16 x, ulong16 y);\n"
36020"\n"
36021"/**\n"
36022" * Returns min(max(x, minval), maxval).\n"
36023" * Results are undefined if minval > maxval.\n"
36024" */\n"
36025"char __ovld __cnfn clamp(char x, char minval, char maxval);\n"
36026"uchar __ovld __cnfn clamp(uchar x, uchar minval, uchar maxval);\n"
36027"char2 __ovld __cnfn clamp(char2 x, char2 minval, char2 maxval);\n"
36028"uchar2 __ovld __cnfn clamp(uchar2 x, uchar2 minval, uchar2 maxval);\n"
36029"char3 __ovld __cnfn clamp(char3 x, char3 minval, char3 maxval);\n"
36030"uchar3 __ovld __cnfn clamp(uchar3 x, uchar3 minval, uchar3 maxval);\n"
36031"char4 __ovld __cnfn clamp(char4 x, char4 minval, char4 maxval);\n"
36032"uchar4 __ovld __cnfn clamp(uchar4 x, uchar4 minval, uchar4 maxval);\n"
36033"char8 __ovld __cnfn clamp(char8 x, char8 minval, char8 maxval);\n"
36034"uchar8 __ovld __cnfn clamp(uchar8 x, uchar8 minval, uchar8 maxval);\n"
36035"char16 __ovld __cnfn clamp(char16 x, char16 minval, char16 maxval);\n"
36036"uchar16 __ovld __cnfn clamp(uchar16 x, uchar16 minval, uchar16 maxval);\n"
36037"short __ovld __cnfn clamp(short x, short minval, short maxval);\n"
36038"ushort __ovld __cnfn clamp(ushort x, ushort minval, ushort maxval);\n"
36039"short2 __ovld __cnfn clamp(short2 x, short2 minval, short2 maxval);\n"
36040"ushort2 __ovld __cnfn clamp(ushort2 x, ushort2 minval, ushort2 maxval);\n"
36041"short3 __ovld __cnfn clamp(short3 x, short3 minval, short3 maxval);\n"
36042"ushort3 __ovld __cnfn clamp(ushort3 x, ushort3 minval, ushort3 maxval);\n"
36043"short4 __ovld __cnfn clamp(short4 x, short4 minval, short4 maxval);\n"
36044"ushort4 __ovld __cnfn clamp(ushort4 x, ushort4 minval, ushort4 maxval);\n"
36045"short8 __ovld __cnfn clamp(short8 x, short8 minval, short8 maxval);\n"
36046"ushort8 __ovld __cnfn clamp(ushort8 x, ushort8 minval, ushort8 maxval);\n"
36047"short16 __ovld __cnfn clamp(short16 x, short16 minval, short16 maxval);\n"
36048"ushort16 __ovld __cnfn clamp(ushort16 x, ushort16 minval, ushort16 maxval);\n"
36049"int __ovld __cnfn clamp(int x, int minval, int maxval);\n"
36050"uint __ovld __cnfn clamp(uint x, uint minval, uint maxval);\n"
36051"int2 __ovld __cnfn clamp(int2 x, int2 minval, int2 maxval);\n"
36052"uint2 __ovld __cnfn clamp(uint2 x, uint2 minval, uint2 maxval);\n"
36053"int3 __ovld __cnfn clamp(int3 x, int3 minval, int3 maxval);\n"
36054"uint3 __ovld __cnfn clamp(uint3 x, uint3 minval, uint3 maxval);\n"
36055"int4 __ovld __cnfn clamp(int4 x, int4 minval, int4 maxval);\n"
36056"uint4 __ovld __cnfn clamp(uint4 x, uint4 minval, uint4 maxval);\n"
36057"int8 __ovld __cnfn clamp(int8 x, int8 minval, int8 maxval);\n"
36058"uint8 __ovld __cnfn clamp(uint8 x, uint8 minval, uint8 maxval);\n"
36059"int16 __ovld __cnfn clamp(int16 x, int16 minval, int16 maxval);\n"
36060"uint16 __ovld __cnfn clamp(uint16 x, uint16 minval, uint16 maxval);\n"
36061"long __ovld __cnfn clamp(long x, long minval, long maxval);\n"
36062"ulong __ovld __cnfn clamp(ulong x, ulong minval, ulong maxval);\n"
36063"long2 __ovld __cnfn clamp(long2 x, long2 minval, long2 maxval);\n"
36064"ulong2 __ovld __cnfn clamp(ulong2 x, ulong2 minval, ulong2 maxval);\n"
36065"long3 __ovld __cnfn clamp(long3 x, long3 minval, long3 maxval);\n"
36066"ulong3 __ovld __cnfn clamp(ulong3 x, ulong3 minval, ulong3 maxval);\n"
36067"long4 __ovld __cnfn clamp(long4 x, long4 minval, long4 maxval);\n"
36068"ulong4 __ovld __cnfn clamp(ulong4 x, ulong4 minval, ulong4 maxval);\n"
36069"long8 __ovld __cnfn clamp(long8 x, long8 minval, long8 maxval);\n"
36070"ulong8 __ovld __cnfn clamp(ulong8 x, ulong8 minval, ulong8 maxval);\n"
36071"long16 __ovld __cnfn clamp(long16 x, long16 minval, long16 maxval);\n"
36072"ulong16 __ovld __cnfn clamp(ulong16 x, ulong16 minval, ulong16 maxval);\n"
36073"char __ovld __cnfn clamp(char x, char minval, char maxval);\n"
36074"uchar __ovld __cnfn clamp(uchar x, uchar minval, uchar maxval);\n"
36075"char2 __ovld __cnfn clamp(char2 x, char minval, char maxval);\n"
36076"uchar2 __ovld __cnfn clamp(uchar2 x, uchar minval, uchar maxval);\n"
36077"char3 __ovld __cnfn clamp(char3 x, char minval, char maxval);\n"
36078"uchar3 __ovld __cnfn clamp(uchar3 x, uchar minval, uchar maxval);\n"
36079"char4 __ovld __cnfn clamp(char4 x, char minval, char maxval);\n"
36080"uchar4 __ovld __cnfn clamp(uchar4 x, uchar minval, uchar maxval);\n"
36081"char8 __ovld __cnfn clamp(char8 x, char minval, char maxval);\n"
36082"uchar8 __ovld __cnfn clamp(uchar8 x, uchar minval, uchar maxval);\n"
36083"char16 __ovld __cnfn clamp(char16 x, char minval, char maxval);\n"
36084"uchar16 __ovld __cnfn clamp(uchar16 x, uchar minval, uchar maxval);\n"
36085"short __ovld __cnfn clamp(short x, short minval, short maxval);\n"
36086"ushort __ovld __cnfn clamp(ushort x, ushort minval, ushort maxval);\n"
36087"short2 __ovld __cnfn clamp(short2 x, short minval, short maxval);\n"
36088"ushort2 __ovld __cnfn clamp(ushort2 x, ushort minval, ushort maxval);\n"
36089"short3 __ovld __cnfn clamp(short3 x, short minval, short maxval);\n"
36090"ushort3 __ovld __cnfn clamp(ushort3 x, ushort minval, ushort maxval);\n"
36091"short4 __ovld __cnfn clamp(short4 x, short minval, short maxval);\n"
36092"ushort4 __ovld __cnfn clamp(ushort4 x, ushort minval, ushort maxval);\n"
36093"short8 __ovld __cnfn clamp(short8 x, short minval, short maxval);\n"
36094"ushort8 __ovld __cnfn clamp(ushort8 x, ushort minval, ushort maxval);\n"
36095"short16 __ovld __cnfn clamp(short16 x, short minval, short maxval);\n"
36096"ushort16 __ovld __cnfn clamp(ushort16 x, ushort minval, ushort maxval);\n"
36097"int __ovld __cnfn clamp(int x, int minval, int maxval);\n"
36098"uint __ovld __cnfn clamp(uint x, uint minval, uint maxval);\n"
36099"int2 __ovld __cnfn clamp(int2 x, int minval, int maxval);\n"
36100"uint2 __ovld __cnfn clamp(uint2 x, uint minval, uint maxval);\n"
36101"int3 __ovld __cnfn clamp(int3 x, int minval, int maxval);\n"
36102"uint3 __ovld __cnfn clamp(uint3 x, uint minval, uint maxval);\n"
36103"int4 __ovld __cnfn clamp(int4 x, int minval, int maxval);\n"
36104"uint4 __ovld __cnfn clamp(uint4 x, uint minval, uint maxval);\n"
36105"int8 __ovld __cnfn clamp(int8 x, int minval, int maxval);\n"
36106"uint8 __ovld __cnfn clamp(uint8 x, uint minval, uint maxval);\n"
36107"int16 __ovld __cnfn clamp(int16 x, int minval, int maxval);\n"
36108"uint16 __ovld __cnfn clamp(uint16 x, uint minval, uint maxval);\n"
36109"long __ovld __cnfn clamp(long x, long minval, long maxval);\n"
36110"ulong __ovld __cnfn clamp(ulong x, ulong minval, ulong maxval);\n"
36111"long2 __ovld __cnfn clamp(long2 x, long minval, long maxval);\n"
36112"ulong2 __ovld __cnfn clamp(ulong2 x, ulong minval, ulong maxval);\n"
36113"long3 __ovld __cnfn clamp(long3 x, long minval, long maxval);\n"
36114"ulong3 __ovld __cnfn clamp(ulong3 x, ulong minval, ulong maxval);\n"
36115"long4 __ovld __cnfn clamp(long4 x, long minval, long maxval);\n"
36116"ulong4 __ovld __cnfn clamp(ulong4 x, ulong minval, ulong maxval);\n"
36117"long8 __ovld __cnfn clamp(long8 x, long minval, long maxval);\n"
36118"ulong8 __ovld __cnfn clamp(ulong8 x, ulong minval, ulong maxval);\n"
36119"long16 __ovld __cnfn clamp(long16 x, long minval, long maxval);\n"
36120"ulong16 __ovld __cnfn clamp(ulong16 x, ulong minval, ulong maxval);\n"
36121"\n"
36122"/**\n"
36123" * Returns the number of leading 0-bits in x, starting\n"
36124" * at the most significant bit position.\n"
36125" */\n"
36126"char __ovld __cnfn clz(char x);\n"
36127"uchar __ovld __cnfn clz(uchar x);\n"
36128"char2 __ovld __cnfn clz(char2 x);\n"
36129"uchar2 __ovld __cnfn clz(uchar2 x);\n"
36130"char3 __ovld __cnfn clz(char3 x);\n"
36131"uchar3 __ovld __cnfn clz(uchar3 x);\n"
36132"char4 __ovld __cnfn clz(char4 x);\n"
36133"uchar4 __ovld __cnfn clz(uchar4 x);\n"
36134"char8 __ovld __cnfn clz(char8 x);\n"
36135"uchar8 __ovld __cnfn clz(uchar8 x);\n"
36136"char16 __ovld __cnfn clz(char16 x);\n"
36137"uchar16 __ovld __cnfn clz(uchar16 x);\n"
36138"short __ovld __cnfn clz(short x);\n"
36139"ushort __ovld __cnfn clz(ushort x);\n"
36140"short2 __ovld __cnfn clz(short2 x);\n"
36141"ushort2 __ovld __cnfn clz(ushort2 x);\n"
36142"short3 __ovld __cnfn clz(short3 x);\n"
36143"ushort3 __ovld __cnfn clz(ushort3 x);\n"
36144"short4 __ovld __cnfn clz(short4 x);\n"
36145"ushort4 __ovld __cnfn clz(ushort4 x);\n"
36146"short8 __ovld __cnfn clz(short8 x);\n"
36147"ushort8 __ovld __cnfn clz(ushort8 x);\n"
36148"short16 __ovld __cnfn clz(short16 x);\n"
36149"ushort16 __ovld __cnfn clz(ushort16 x);\n"
36150"int __ovld __cnfn clz(int x);\n"
36151"uint __ovld __cnfn clz(uint x);\n"
36152"int2 __ovld __cnfn clz(int2 x);\n"
36153"uint2 __ovld __cnfn clz(uint2 x);\n"
36154"int3 __ovld __cnfn clz(int3 x);\n"
36155"uint3 __ovld __cnfn clz(uint3 x);\n"
36156"int4 __ovld __cnfn clz(int4 x);\n"
36157"uint4 __ovld __cnfn clz(uint4 x);\n"
36158"int8 __ovld __cnfn clz(int8 x);\n"
36159"uint8 __ovld __cnfn clz(uint8 x);\n"
36160"int16 __ovld __cnfn clz(int16 x);\n"
36161"uint16 __ovld __cnfn clz(uint16 x);\n"
36162"long __ovld __cnfn clz(long x);\n"
36163"ulong __ovld __cnfn clz(ulong x);\n"
36164"long2 __ovld __cnfn clz(long2 x);\n"
36165"ulong2 __ovld __cnfn clz(ulong2 x);\n"
36166"long3 __ovld __cnfn clz(long3 x);\n"
36167"ulong3 __ovld __cnfn clz(ulong3 x);\n"
36168"long4 __ovld __cnfn clz(long4 x);\n"
36169"ulong4 __ovld __cnfn clz(ulong4 x);\n"
36170"long8 __ovld __cnfn clz(long8 x);\n"
36171"ulong8 __ovld __cnfn clz(ulong8 x);\n"
36172"long16 __ovld __cnfn clz(long16 x);\n"
36173"ulong16 __ovld __cnfn clz(ulong16 x);\n"
36174"\n"
36175"/**\n"
36176" * Returns the count of trailing 0-bits in x. If x is 0,\n"
36177" * returns the size in bits of the type of x or\n"
36178" * component type of x, if x is a vector.\n"
36179" */\n"
36180"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
36181"char __ovld ctz(char x);\n"
36182"uchar __ovld ctz(uchar x);\n"
36183"char2 __ovld ctz(char2 x);\n"
36184"uchar2 __ovld ctz(uchar2 x);\n"
36185"char3 __ovld ctz(char3 x);\n"
36186"uchar3 __ovld ctz(uchar3 x);\n"
36187"char4 __ovld ctz(char4 x);\n"
36188"uchar4 __ovld ctz(uchar4 x);\n"
36189"char8 __ovld ctz(char8 x);\n"
36190"uchar8 __ovld ctz(uchar8 x);\n"
36191"char16 __ovld ctz(char16 x);\n"
36192"uchar16 __ovld ctz(uchar16 x);\n"
36193"short __ovld ctz(short x);\n"
36194"ushort __ovld ctz(ushort x);\n"
36195"short2 __ovld ctz(short2 x);\n"
36196"ushort2 __ovld ctz(ushort2 x);\n"
36197"short3 __ovld ctz(short3 x);\n"
36198"ushort3 __ovld ctz(ushort3 x);\n"
36199"short4 __ovld ctz(short4 x);\n"
36200"ushort4 __ovld ctz(ushort4 x);\n"
36201"short8 __ovld ctz(short8 x);\n"
36202"ushort8 __ovld ctz(ushort8 x);\n"
36203"short16 __ovld ctz(short16 x);\n"
36204"ushort16 __ovld ctz(ushort16 x);\n"
36205"int __ovld ctz(int x);\n"
36206"uint __ovld ctz(uint x);\n"
36207"int2 __ovld ctz(int2 x);\n"
36208"uint2 __ovld ctz(uint2 x);\n"
36209"int3 __ovld ctz(int3 x);\n"
36210"uint3 __ovld ctz(uint3 x);\n"
36211"int4 __ovld ctz(int4 x);\n"
36212"uint4 __ovld ctz(uint4 x);\n"
36213"int8 __ovld ctz(int8 x);\n"
36214"uint8 __ovld ctz(uint8 x);\n"
36215"int16 __ovld ctz(int16 x);\n"
36216"uint16 __ovld ctz(uint16 x);\n"
36217"long __ovld ctz(long x);\n"
36218"ulong __ovld ctz(ulong x);\n"
36219"long2 __ovld ctz(long2 x);\n"
36220"ulong2 __ovld ctz(ulong2 x);\n"
36221"long3 __ovld ctz(long3 x);\n"
36222"ulong3 __ovld ctz(ulong3 x);\n"
36223"long4 __ovld ctz(long4 x);\n"
36224"ulong4 __ovld ctz(ulong4 x);\n"
36225"long8 __ovld ctz(long8 x);\n"
36226"ulong8 __ovld ctz(ulong8 x);\n"
36227"long16 __ovld ctz(long16 x);\n"
36228"ulong16 __ovld ctz(ulong16 x);\n"
36229"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
36230"\n"
36231"/**\n"
36232" * Returns mul_hi(a, b) + c.\n"
36233" */\n"
36234"char __ovld __cnfn mad_hi(char a, char b, char c);\n"
36235"uchar __ovld __cnfn mad_hi(uchar a, uchar b, uchar c);\n"
36236"char2 __ovld __cnfn mad_hi(char2 a, char2 b, char2 c);\n"
36237"uchar2 __ovld __cnfn mad_hi(uchar2 a, uchar2 b, uchar2 c);\n"
36238"char3 __ovld __cnfn mad_hi(char3 a, char3 b, char3 c);\n"
36239"uchar3 __ovld __cnfn mad_hi(uchar3 a, uchar3 b, uchar3 c);\n"
36240"char4 __ovld __cnfn mad_hi(char4 a, char4 b, char4 c);\n"
36241"uchar4 __ovld __cnfn mad_hi(uchar4 a, uchar4 b, uchar4 c);\n"
36242"char8 __ovld __cnfn mad_hi(char8 a, char8 b, char8 c);\n"
36243"uchar8 __ovld __cnfn mad_hi(uchar8 a, uchar8 b, uchar8 c);\n"
36244"char16 __ovld __cnfn mad_hi(char16 a, char16 b, char16 c);\n"
36245"uchar16 __ovld __cnfn mad_hi(uchar16 a, uchar16 b, uchar16 c);\n"
36246"short __ovld __cnfn mad_hi(short a, short b, short c);\n"
36247"ushort __ovld __cnfn mad_hi(ushort a, ushort b, ushort c);\n"
36248"short2 __ovld __cnfn mad_hi(short2 a, short2 b, short2 c);\n"
36249"ushort2 __ovld __cnfn mad_hi(ushort2 a, ushort2 b, ushort2 c);\n"
36250"short3 __ovld __cnfn mad_hi(short3 a, short3 b, short3 c);\n"
36251"ushort3 __ovld __cnfn mad_hi(ushort3 a, ushort3 b, ushort3 c);\n"
36252"short4 __ovld __cnfn mad_hi(short4 a, short4 b, short4 c);\n"
36253"ushort4 __ovld __cnfn mad_hi(ushort4 a, ushort4 b, ushort4 c);\n"
36254"short8 __ovld __cnfn mad_hi(short8 a, short8 b, short8 c);\n"
36255"ushort8 __ovld __cnfn mad_hi(ushort8 a, ushort8 b, ushort8 c);\n"
36256"short16 __ovld __cnfn mad_hi(short16 a, short16 b, short16 c);\n"
36257"ushort16 __ovld __cnfn mad_hi(ushort16 a, ushort16 b, ushort16 c);\n"
36258"int __ovld __cnfn mad_hi(int a, int b, int c);\n"
36259"uint __ovld __cnfn mad_hi(uint a, uint b, uint c);\n"
36260"int2 __ovld __cnfn mad_hi(int2 a, int2 b, int2 c);\n"
36261"uint2 __ovld __cnfn mad_hi(uint2 a, uint2 b, uint2 c);\n"
36262"int3 __ovld __cnfn mad_hi(int3 a, int3 b, int3 c);\n"
36263"uint3 __ovld __cnfn mad_hi(uint3 a, uint3 b, uint3 c);\n"
36264"int4 __ovld __cnfn mad_hi(int4 a, int4 b, int4 c);\n"
36265"uint4 __ovld __cnfn mad_hi(uint4 a, uint4 b, uint4 c);\n"
36266"int8 __ovld __cnfn mad_hi(int8 a, int8 b, int8 c);\n"
36267"uint8 __ovld __cnfn mad_hi(uint8 a, uint8 b, uint8 c);\n"
36268"int16 __ovld __cnfn mad_hi(int16 a, int16 b, int16 c);\n"
36269"uint16 __ovld __cnfn mad_hi(uint16 a, uint16 b, uint16 c);\n"
36270"long __ovld __cnfn mad_hi(long a, long b, long c);\n"
36271"ulong __ovld __cnfn mad_hi(ulong a, ulong b, ulong c);\n"
36272"long2 __ovld __cnfn mad_hi(long2 a, long2 b, long2 c);\n"
36273"ulong2 __ovld __cnfn mad_hi(ulong2 a, ulong2 b, ulong2 c);\n"
36274"long3 __ovld __cnfn mad_hi(long3 a, long3 b, long3 c);\n"
36275"ulong3 __ovld __cnfn mad_hi(ulong3 a, ulong3 b, ulong3 c);\n"
36276"long4 __ovld __cnfn mad_hi(long4 a, long4 b, long4 c);\n"
36277"ulong4 __ovld __cnfn mad_hi(ulong4 a, ulong4 b, ulong4 c);\n"
36278"long8 __ovld __cnfn mad_hi(long8 a, long8 b, long8 c);\n"
36279"ulong8 __ovld __cnfn mad_hi(ulong8 a, ulong8 b, ulong8 c);\n"
36280"long16 __ovld __cnfn mad_hi(long16 a, long16 b, long16 c);\n"
36281"ulong16 __ovld __cnfn mad_hi(ulong16 a, ulong16 b, ulong16 c);\n"
36282"\n"
36283"/**\n"
36284" * Returns a * b + c and saturates the result.\n"
36285" */\n"
36286"char __ovld __cnfn mad_sat(char a, char b, char c);\n"
36287"uchar __ovld __cnfn mad_sat(uchar a, uchar b, uchar c);\n"
36288"char2 __ovld __cnfn mad_sat(char2 a, char2 b, char2 c);\n"
36289"uchar2 __ovld __cnfn mad_sat(uchar2 a, uchar2 b, uchar2 c);\n"
36290"char3 __ovld __cnfn mad_sat(char3 a, char3 b, char3 c);\n"
36291"uchar3 __ovld __cnfn mad_sat(uchar3 a, uchar3 b, uchar3 c);\n"
36292"char4 __ovld __cnfn mad_sat(char4 a, char4 b, char4 c);\n"
36293"uchar4 __ovld __cnfn mad_sat(uchar4 a, uchar4 b, uchar4 c);\n"
36294"char8 __ovld __cnfn mad_sat(char8 a, char8 b, char8 c);\n"
36295"uchar8 __ovld __cnfn mad_sat(uchar8 a, uchar8 b, uchar8 c);\n"
36296"char16 __ovld __cnfn mad_sat(char16 a, char16 b, char16 c);\n"
36297"uchar16 __ovld __cnfn mad_sat(uchar16 a, uchar16 b, uchar16 c);\n"
36298"short __ovld __cnfn mad_sat(short a, short b, short c);\n"
36299"ushort __ovld __cnfn mad_sat(ushort a, ushort b, ushort c);\n"
36300"short2 __ovld __cnfn mad_sat(short2 a, short2 b, short2 c);\n"
36301"ushort2 __ovld __cnfn mad_sat(ushort2 a, ushort2 b, ushort2 c);\n"
36302"short3 __ovld __cnfn mad_sat(short3 a, short3 b, short3 c);\n"
36303"ushort3 __ovld __cnfn mad_sat(ushort3 a, ushort3 b, ushort3 c);\n"
36304"short4 __ovld __cnfn mad_sat(short4 a, short4 b, short4 c);\n"
36305"ushort4 __ovld __cnfn mad_sat(ushort4 a, ushort4 b, ushort4 c);\n"
36306"short8 __ovld __cnfn mad_sat(short8 a, short8 b, short8 c);\n"
36307"ushort8 __ovld __cnfn mad_sat(ushort8 a, ushort8 b, ushort8 c);\n"
36308"short16 __ovld __cnfn mad_sat(short16 a, short16 b, short16 c);\n"
36309"ushort16 __ovld __cnfn mad_sat(ushort16 a, ushort16 b, ushort16 c);\n"
36310"int __ovld __cnfn mad_sat(int a, int b, int c);\n"
36311"uint __ovld __cnfn mad_sat(uint a, uint b, uint c);\n"
36312"int2 __ovld __cnfn mad_sat(int2 a, int2 b, int2 c);\n"
36313"uint2 __ovld __cnfn mad_sat(uint2 a, uint2 b, uint2 c);\n"
36314"int3 __ovld __cnfn mad_sat(int3 a, int3 b, int3 c);\n"
36315"uint3 __ovld __cnfn mad_sat(uint3 a, uint3 b, uint3 c);\n"
36316"int4 __ovld __cnfn mad_sat(int4 a, int4 b, int4 c);\n"
36317"uint4 __ovld __cnfn mad_sat(uint4 a, uint4 b, uint4 c);\n"
36318"int8 __ovld __cnfn mad_sat(int8 a, int8 b, int8 c);\n"
36319"uint8 __ovld __cnfn mad_sat(uint8 a, uint8 b, uint8 c);\n"
36320"int16 __ovld __cnfn mad_sat(int16 a, int16 b, int16 c);\n"
36321"uint16 __ovld __cnfn mad_sat(uint16 a, uint16 b, uint16 c);\n"
36322"long __ovld __cnfn mad_sat(long a, long b, long c);\n"
36323"ulong __ovld __cnfn mad_sat(ulong a, ulong b, ulong c);\n"
36324"long2 __ovld __cnfn mad_sat(long2 a, long2 b, long2 c);\n"
36325"ulong2 __ovld __cnfn mad_sat(ulong2 a, ulong2 b, ulong2 c);\n"
36326"long3 __ovld __cnfn mad_sat(long3 a, long3 b, long3 c);\n"
36327"ulong3 __ovld __cnfn mad_sat(ulong3 a, ulong3 b, ulong3 c);\n"
36328"long4 __ovld __cnfn mad_sat(long4 a, long4 b, long4 c);\n"
36329"ulong4 __ovld __cnfn mad_sat(ulong4 a, ulong4 b, ulong4 c);\n"
36330"long8 __ovld __cnfn mad_sat(long8 a, long8 b, long8 c);\n"
36331"ulong8 __ovld __cnfn mad_sat(ulong8 a, ulong8 b, ulong8 c);\n"
36332"long16 __ovld __cnfn mad_sat(long16 a, long16 b, long16 c);\n"
36333"ulong16 __ovld __cnfn mad_sat(ulong16 a, ulong16 b, ulong16 c);\n"
36334"\n"
36335"/**\n"
36336" * Returns y if x < y, otherwise it returns x.\n"
36337" */\n"
36338"char __ovld __cnfn max(char x, char y);\n"
36339"uchar __ovld __cnfn max(uchar x, uchar y);\n"
36340"char2 __ovld __cnfn max(char2 x, char2 y);\n"
36341"uchar2 __ovld __cnfn max(uchar2 x, uchar2 y);\n"
36342"char3 __ovld __cnfn max(char3 x, char3 y);\n"
36343"uchar3 __ovld __cnfn max(uchar3 x, uchar3 y);\n"
36344"char4 __ovld __cnfn max(char4 x, char4 y);\n"
36345"uchar4 __ovld __cnfn max(uchar4 x, uchar4 y);\n"
36346"char8 __ovld __cnfn max(char8 x, char8 y);\n"
36347"uchar8 __ovld __cnfn max(uchar8 x, uchar8 y);\n"
36348"char16 __ovld __cnfn max(char16 x, char16 y);\n"
36349"uchar16 __ovld __cnfn max(uchar16 x, uchar16 y);\n"
36350"short __ovld __cnfn max(short x, short y);\n"
36351"ushort __ovld __cnfn max(ushort x, ushort y);\n"
36352"short2 __ovld __cnfn max(short2 x, short2 y);\n"
36353"ushort2 __ovld __cnfn max(ushort2 x, ushort2 y);\n"
36354"short3 __ovld __cnfn max(short3 x, short3 y);\n"
36355"ushort3 __ovld __cnfn max(ushort3 x, ushort3 y);\n"
36356"short4 __ovld __cnfn max(short4 x, short4 y);\n"
36357"ushort4 __ovld __cnfn max(ushort4 x, ushort4 y);\n"
36358"short8 __ovld __cnfn max(short8 x, short8 y);\n"
36359"ushort8 __ovld __cnfn max(ushort8 x, ushort8 y);\n"
36360"short16 __ovld __cnfn max(short16 x, short16 y);\n"
36361"ushort16 __ovld __cnfn max(ushort16 x, ushort16 y);\n"
36362"int __ovld __cnfn max(int x, int y);\n"
36363"uint __ovld __cnfn max(uint x, uint y);\n"
36364"int2 __ovld __cnfn max(int2 x, int2 y);\n"
36365"uint2 __ovld __cnfn max(uint2 x, uint2 y);\n"
36366"int3 __ovld __cnfn max(int3 x, int3 y);\n"
36367"uint3 __ovld __cnfn max(uint3 x, uint3 y);\n"
36368"int4 __ovld __cnfn max(int4 x, int4 y);\n"
36369"uint4 __ovld __cnfn max(uint4 x, uint4 y);\n"
36370"int8 __ovld __cnfn max(int8 x, int8 y);\n"
36371"uint8 __ovld __cnfn max(uint8 x, uint8 y);\n"
36372"int16 __ovld __cnfn max(int16 x, int16 y);\n"
36373"uint16 __ovld __cnfn max(uint16 x, uint16 y);\n"
36374"long __ovld __cnfn max(long x, long y);\n"
36375"ulong __ovld __cnfn max(ulong x, ulong y);\n"
36376"long2 __ovld __cnfn max(long2 x, long2 y);\n"
36377"ulong2 __ovld __cnfn max(ulong2 x, ulong2 y);\n"
36378"long3 __ovld __cnfn max(long3 x, long3 y);\n"
36379"ulong3 __ovld __cnfn max(ulong3 x, ulong3 y);\n"
36380"long4 __ovld __cnfn max(long4 x, long4 y);\n"
36381"ulong4 __ovld __cnfn max(ulong4 x, ulong4 y);\n"
36382"long8 __ovld __cnfn max(long8 x, long8 y);\n"
36383"ulong8 __ovld __cnfn max(ulong8 x, ulong8 y);\n"
36384"long16 __ovld __cnfn max(long16 x, long16 y);\n"
36385"ulong16 __ovld __cnfn max(ulong16 x, ulong16 y);\n"
36386"char __ovld __cnfn max(char x, char y);\n"
36387"uchar __ovld __cnfn max(uchar x, uchar y);\n"
36388"char2 __ovld __cnfn max(char2 x, char y);\n"
36389"uchar2 __ovld __cnfn max(uchar2 x, uchar y);\n"
36390"char3 __ovld __cnfn max(char3 x, char y);\n"
36391"uchar3 __ovld __cnfn max(uchar3 x, uchar y);\n"
36392"char4 __ovld __cnfn max(char4 x, char y);\n"
36393"uchar4 __ovld __cnfn max(uchar4 x, uchar y);\n"
36394"char8 __ovld __cnfn max(char8 x, char y);\n"
36395"uchar8 __ovld __cnfn max(uchar8 x, uchar y);\n"
36396"char16 __ovld __cnfn max(char16 x, char y);\n"
36397"uchar16 __ovld __cnfn max(uchar16 x, uchar y);\n"
36398"short __ovld __cnfn max(short x, short y);\n"
36399"ushort __ovld __cnfn max(ushort x, ushort y);\n"
36400"short2 __ovld __cnfn max(short2 x, short y);\n"
36401"ushort2 __ovld __cnfn max(ushort2 x, ushort y);\n"
36402"short3 __ovld __cnfn max(short3 x, short y);\n"
36403"ushort3 __ovld __cnfn max(ushort3 x, ushort y);\n"
36404"short4 __ovld __cnfn max(short4 x, short y);\n"
36405"ushort4 __ovld __cnfn max(ushort4 x, ushort y);\n"
36406"short8 __ovld __cnfn max(short8 x, short y);\n"
36407"ushort8 __ovld __cnfn max(ushort8 x, ushort y);\n"
36408"short16 __ovld __cnfn max(short16 x, short y);\n"
36409"ushort16 __ovld __cnfn max(ushort16 x, ushort y);\n"
36410"int __ovld __cnfn max(int x, int y);\n"
36411"uint __ovld __cnfn max(uint x, uint y);\n"
36412"int2 __ovld __cnfn max(int2 x, int y);\n"
36413"uint2 __ovld __cnfn max(uint2 x, uint y);\n"
36414"int3 __ovld __cnfn max(int3 x, int y);\n"
36415"uint3 __ovld __cnfn max(uint3 x, uint y);\n"
36416"int4 __ovld __cnfn max(int4 x, int y);\n"
36417"uint4 __ovld __cnfn max(uint4 x, uint y);\n"
36418"int8 __ovld __cnfn max(int8 x, int y);\n"
36419"uint8 __ovld __cnfn max(uint8 x, uint y);\n"
36420"int16 __ovld __cnfn max(int16 x, int y);\n"
36421"uint16 __ovld __cnfn max(uint16 x, uint y);\n"
36422"long __ovld __cnfn max(long x, long y);\n"
36423"ulong __ovld __cnfn max(ulong x, ulong y);\n"
36424"long2 __ovld __cnfn max(long2 x, long y);\n"
36425"ulong2 __ovld __cnfn max(ulong2 x, ulong y);\n"
36426"long3 __ovld __cnfn max(long3 x, long y);\n"
36427"ulong3 __ovld __cnfn max(ulong3 x, ulong y);\n"
36428"long4 __ovld __cnfn max(long4 x, long y);\n"
36429"ulong4 __ovld __cnfn max(ulong4 x, ulong y);\n"
36430"long8 __ovld __cnfn max(long8 x, long y);\n"
36431"ulong8 __ovld __cnfn max(ulong8 x, ulong y);\n"
36432"long16 __ovld __cnfn max(long16 x, long y);\n"
36433"ulong16 __ovld __cnfn max(ulong16 x, ulong y);\n"
36434"\n"
36435"/**\n"
36436" * Returns y if y < x, otherwise it returns x.\n"
36437" */\n"
36438"char __ovld __cnfn min(char x, char y);\n"
36439"uchar __ovld __cnfn min(uchar x, uchar y);\n"
36440"char2 __ovld __cnfn min(char2 x, char2 y);\n"
36441"uchar2 __ovld __cnfn min(uchar2 x, uchar2 y);\n"
36442"char3 __ovld __cnfn min(char3 x, char3 y);\n"
36443"uchar3 __ovld __cnfn min(uchar3 x, uchar3 y);\n"
36444"char4 __ovld __cnfn min(char4 x, char4 y);\n"
36445"uchar4 __ovld __cnfn min(uchar4 x, uchar4 y);\n"
36446"char8 __ovld __cnfn min(char8 x, char8 y);\n"
36447"uchar8 __ovld __cnfn min(uchar8 x, uchar8 y);\n"
36448"char16 __ovld __cnfn min(char16 x, char16 y);\n"
36449"uchar16 __ovld __cnfn min(uchar16 x, uchar16 y);\n"
36450"short __ovld __cnfn min(short x, short y);\n"
36451"ushort __ovld __cnfn min(ushort x, ushort y);\n"
36452"short2 __ovld __cnfn min(short2 x, short2 y);\n"
36453"ushort2 __ovld __cnfn min(ushort2 x, ushort2 y);\n"
36454"short3 __ovld __cnfn min(short3 x, short3 y);\n"
36455"ushort3 __ovld __cnfn min(ushort3 x, ushort3 y);\n"
36456"short4 __ovld __cnfn min(short4 x, short4 y);\n"
36457"ushort4 __ovld __cnfn min(ushort4 x, ushort4 y);\n"
36458"short8 __ovld __cnfn min(short8 x, short8 y);\n"
36459"ushort8 __ovld __cnfn min(ushort8 x, ushort8 y);\n"
36460"short16 __ovld __cnfn min(short16 x, short16 y);\n"
36461"ushort16 __ovld __cnfn min(ushort16 x, ushort16 y);\n"
36462"int __ovld __cnfn min(int x, int y);\n"
36463"uint __ovld __cnfn min(uint x, uint y);\n"
36464"int2 __ovld __cnfn min(int2 x, int2 y);\n"
36465"uint2 __ovld __cnfn min(uint2 x, uint2 y);\n"
36466"int3 __ovld __cnfn min(int3 x, int3 y);\n"
36467"uint3 __ovld __cnfn min(uint3 x, uint3 y);\n"
36468"int4 __ovld __cnfn min(int4 x, int4 y);\n"
36469"uint4 __ovld __cnfn min(uint4 x, uint4 y);\n"
36470"int8 __ovld __cnfn min(int8 x, int8 y);\n"
36471"uint8 __ovld __cnfn min(uint8 x, uint8 y);\n"
36472"int16 __ovld __cnfn min(int16 x, int16 y);\n"
36473"uint16 __ovld __cnfn min(uint16 x, uint16 y);\n"
36474"long __ovld __cnfn min(long x, long y);\n"
36475"ulong __ovld __cnfn min(ulong x, ulong y);\n"
36476"long2 __ovld __cnfn min(long2 x, long2 y);\n"
36477"ulong2 __ovld __cnfn min(ulong2 x, ulong2 y);\n"
36478"long3 __ovld __cnfn min(long3 x, long3 y);\n"
36479"ulong3 __ovld __cnfn min(ulong3 x, ulong3 y);\n"
36480"long4 __ovld __cnfn min(long4 x, long4 y);\n"
36481"ulong4 __ovld __cnfn min(ulong4 x, ulong4 y);\n"
36482"long8 __ovld __cnfn min(long8 x, long8 y);\n"
36483"ulong8 __ovld __cnfn min(ulong8 x, ulong8 y);\n"
36484"long16 __ovld __cnfn min(long16 x, long16 y);\n"
36485"ulong16 __ovld __cnfn min(ulong16 x, ulong16 y);\n"
36486"char __ovld __cnfn min(char x, char y);\n"
36487"uchar __ovld __cnfn min(uchar x, uchar y);\n"
36488"char2 __ovld __cnfn min(char2 x, char y);\n"
36489"uchar2 __ovld __cnfn min(uchar2 x, uchar y);\n"
36490"char3 __ovld __cnfn min(char3 x, char y);\n"
36491"uchar3 __ovld __cnfn min(uchar3 x, uchar y);\n"
36492"char4 __ovld __cnfn min(char4 x, char y);\n"
36493"uchar4 __ovld __cnfn min(uchar4 x, uchar y);\n"
36494"char8 __ovld __cnfn min(char8 x, char y);\n"
36495"uchar8 __ovld __cnfn min(uchar8 x, uchar y);\n"
36496"char16 __ovld __cnfn min(char16 x, char y);\n"
36497"uchar16 __ovld __cnfn min(uchar16 x, uchar y);\n"
36498"short __ovld __cnfn min(short x, short y);\n"
36499"ushort __ovld __cnfn min(ushort x, ushort y);\n"
36500"short2 __ovld __cnfn min(short2 x, short y);\n"
36501"ushort2 __ovld __cnfn min(ushort2 x, ushort y);\n"
36502"short3 __ovld __cnfn min(short3 x, short y);\n"
36503"ushort3 __ovld __cnfn min(ushort3 x, ushort y);\n"
36504"short4 __ovld __cnfn min(short4 x, short y);\n"
36505"ushort4 __ovld __cnfn min(ushort4 x, ushort y);\n"
36506"short8 __ovld __cnfn min(short8 x, short y);\n"
36507"ushort8 __ovld __cnfn min(ushort8 x, ushort y);\n"
36508"short16 __ovld __cnfn min(short16 x, short y);\n"
36509"ushort16 __ovld __cnfn min(ushort16 x, ushort y);\n"
36510"int __ovld __cnfn min(int x, int y);\n"
36511"uint __ovld __cnfn min(uint x, uint y);\n"
36512"int2 __ovld __cnfn min(int2 x, int y);\n"
36513"uint2 __ovld __cnfn min(uint2 x, uint y);\n"
36514"int3 __ovld __cnfn min(int3 x, int y);\n"
36515"uint3 __ovld __cnfn min(uint3 x, uint y);\n"
36516"int4 __ovld __cnfn min(int4 x, int y);\n"
36517"uint4 __ovld __cnfn min(uint4 x, uint y);\n"
36518"int8 __ovld __cnfn min(int8 x, int y);\n"
36519"uint8 __ovld __cnfn min(uint8 x, uint y);\n"
36520"int16 __ovld __cnfn min(int16 x, int y);\n"
36521"uint16 __ovld __cnfn min(uint16 x, uint y);\n"
36522"long __ovld __cnfn min(long x, long y);\n"
36523"ulong __ovld __cnfn min(ulong x, ulong y);\n"
36524"long2 __ovld __cnfn min(long2 x, long y);\n"
36525"ulong2 __ovld __cnfn min(ulong2 x, ulong y);\n"
36526"long3 __ovld __cnfn min(long3 x, long y);\n"
36527"ulong3 __ovld __cnfn min(ulong3 x, ulong y);\n"
36528"long4 __ovld __cnfn min(long4 x, long y);\n"
36529"ulong4 __ovld __cnfn min(ulong4 x, ulong y);\n"
36530"long8 __ovld __cnfn min(long8 x, long y);\n"
36531"ulong8 __ovld __cnfn min(ulong8 x, ulong y);\n"
36532"long16 __ovld __cnfn min(long16 x, long y);\n"
36533"ulong16 __ovld __cnfn min(ulong16 x, ulong y);\n"
36534"\n"
36535"/**\n"
36536" * Computes x * y and returns the high half of the\n"
36537" * product of x and y.\n"
36538" */\n"
36539"char __ovld __cnfn mul_hi(char x, char y);\n"
36540"uchar __ovld __cnfn mul_hi(uchar x, uchar y);\n"
36541"char2 __ovld __cnfn mul_hi(char2 x, char2 y);\n"
36542"uchar2 __ovld __cnfn mul_hi(uchar2 x, uchar2 y);\n"
36543"char3 __ovld __cnfn mul_hi(char3 x, char3 y);\n"
36544"uchar3 __ovld __cnfn mul_hi(uchar3 x, uchar3 y);\n"
36545"char4 __ovld __cnfn mul_hi(char4 x, char4 y);\n"
36546"uchar4 __ovld __cnfn mul_hi(uchar4 x, uchar4 y);\n"
36547"char8 __ovld __cnfn mul_hi(char8 x, char8 y);\n"
36548"uchar8 __ovld __cnfn mul_hi(uchar8 x, uchar8 y);\n"
36549"char16 __ovld __cnfn mul_hi(char16 x, char16 y);\n"
36550"uchar16 __ovld __cnfn mul_hi(uchar16 x, uchar16 y);\n"
36551"short __ovld __cnfn mul_hi(short x, short y);\n"
36552"ushort __ovld __cnfn mul_hi(ushort x, ushort y);\n"
36553"short2 __ovld __cnfn mul_hi(short2 x, short2 y);\n"
36554"ushort2 __ovld __cnfn mul_hi(ushort2 x, ushort2 y);\n"
36555"short3 __ovld __cnfn mul_hi(short3 x, short3 y);\n"
36556"ushort3 __ovld __cnfn mul_hi(ushort3 x, ushort3 y);\n"
36557"short4 __ovld __cnfn mul_hi(short4 x, short4 y);\n"
36558"ushort4 __ovld __cnfn mul_hi(ushort4 x, ushort4 y);\n"
36559"short8 __ovld __cnfn mul_hi(short8 x, short8 y);\n"
36560"ushort8 __ovld __cnfn mul_hi(ushort8 x, ushort8 y);\n"
36561"short16 __ovld __cnfn mul_hi(short16 x, short16 y);\n"
36562"ushort16 __ovld __cnfn mul_hi(ushort16 x, ushort16 y);\n"
36563"int __ovld __cnfn mul_hi(int x, int y);\n"
36564"uint __ovld __cnfn mul_hi(uint x, uint y);\n"
36565"int2 __ovld __cnfn mul_hi(int2 x, int2 y);\n"
36566"uint2 __ovld __cnfn mul_hi(uint2 x, uint2 y);\n"
36567"int3 __ovld __cnfn mul_hi(int3 x, int3 y);\n"
36568"uint3 __ovld __cnfn mul_hi(uint3 x, uint3 y);\n"
36569"int4 __ovld __cnfn mul_hi(int4 x, int4 y);\n"
36570"uint4 __ovld __cnfn mul_hi(uint4 x, uint4 y);\n"
36571"int8 __ovld __cnfn mul_hi(int8 x, int8 y);\n"
36572"uint8 __ovld __cnfn mul_hi(uint8 x, uint8 y);\n"
36573"int16 __ovld __cnfn mul_hi(int16 x, int16 y);\n"
36574"uint16 __ovld __cnfn mul_hi(uint16 x, uint16 y);\n"
36575"long __ovld __cnfn mul_hi(long x, long y);\n"
36576"ulong __ovld __cnfn mul_hi(ulong x, ulong y);\n"
36577"long2 __ovld __cnfn mul_hi(long2 x, long2 y);\n"
36578"ulong2 __ovld __cnfn mul_hi(ulong2 x, ulong2 y);\n"
36579"long3 __ovld __cnfn mul_hi(long3 x, long3 y);\n"
36580"ulong3 __ovld __cnfn mul_hi(ulong3 x, ulong3 y);\n"
36581"long4 __ovld __cnfn mul_hi(long4 x, long4 y);\n"
36582"ulong4 __ovld __cnfn mul_hi(ulong4 x, ulong4 y);\n"
36583"long8 __ovld __cnfn mul_hi(long8 x, long8 y);\n"
36584"ulong8 __ovld __cnfn mul_hi(ulong8 x, ulong8 y);\n"
36585"long16 __ovld __cnfn mul_hi(long16 x, long16 y);\n"
36586"ulong16 __ovld __cnfn mul_hi(ulong16 x, ulong16 y);\n"
36587"\n"
36588"/**\n"
36589" * For each element in v, the bits are shifted left by\n"
36590" * the number of bits given by the corresponding\n"
36591" * element in i (subject to usual shift modulo rules\n"
36592" * described in section 6.3). Bits shifted off the left\n"
36593" * side of the element are shifted back in from the\n"
36594" * right.\n"
36595" */\n"
36596"char __ovld __cnfn rotate(char v, char i);\n"
36597"uchar __ovld __cnfn rotate(uchar v, uchar i);\n"
36598"char2 __ovld __cnfn rotate(char2 v, char2 i);\n"
36599"uchar2 __ovld __cnfn rotate(uchar2 v, uchar2 i);\n"
36600"char3 __ovld __cnfn rotate(char3 v, char3 i);\n"
36601"uchar3 __ovld __cnfn rotate(uchar3 v, uchar3 i);\n"
36602"char4 __ovld __cnfn rotate(char4 v, char4 i);\n"
36603"uchar4 __ovld __cnfn rotate(uchar4 v, uchar4 i);\n"
36604"char8 __ovld __cnfn rotate(char8 v, char8 i);\n"
36605"uchar8 __ovld __cnfn rotate(uchar8 v, uchar8 i);\n"
36606"char16 __ovld __cnfn rotate(char16 v, char16 i);\n"
36607"uchar16 __ovld __cnfn rotate(uchar16 v, uchar16 i);\n"
36608"short __ovld __cnfn rotate(short v, short i);\n"
36609"ushort __ovld __cnfn rotate(ushort v, ushort i);\n"
36610"short2 __ovld __cnfn rotate(short2 v, short2 i);\n"
36611"ushort2 __ovld __cnfn rotate(ushort2 v, ushort2 i);\n"
36612"short3 __ovld __cnfn rotate(short3 v, short3 i);\n"
36613"ushort3 __ovld __cnfn rotate(ushort3 v, ushort3 i);\n"
36614"short4 __ovld __cnfn rotate(short4 v, short4 i);\n"
36615"ushort4 __ovld __cnfn rotate(ushort4 v, ushort4 i);\n"
36616"short8 __ovld __cnfn rotate(short8 v, short8 i);\n"
36617"ushort8 __ovld __cnfn rotate(ushort8 v, ushort8 i);\n"
36618"short16 __ovld __cnfn rotate(short16 v, short16 i);\n"
36619"ushort16 __ovld __cnfn rotate(ushort16 v, ushort16 i);\n"
36620"int __ovld __cnfn rotate(int v, int i);\n"
36621"uint __ovld __cnfn rotate(uint v, uint i);\n"
36622"int2 __ovld __cnfn rotate(int2 v, int2 i);\n"
36623"uint2 __ovld __cnfn rotate(uint2 v, uint2 i);\n"
36624"int3 __ovld __cnfn rotate(int3 v, int3 i);\n"
36625"uint3 __ovld __cnfn rotate(uint3 v, uint3 i);\n"
36626"int4 __ovld __cnfn rotate(int4 v, int4 i);\n"
36627"uint4 __ovld __cnfn rotate(uint4 v, uint4 i);\n"
36628"int8 __ovld __cnfn rotate(int8 v, int8 i);\n"
36629"uint8 __ovld __cnfn rotate(uint8 v, uint8 i);\n"
36630"int16 __ovld __cnfn rotate(int16 v, int16 i);\n"
36631"uint16 __ovld __cnfn rotate(uint16 v, uint16 i);\n"
36632"long __ovld __cnfn rotate(long v, long i);\n"
36633"ulong __ovld __cnfn rotate(ulong v, ulong i);\n"
36634"long2 __ovld __cnfn rotate(long2 v, long2 i);\n"
36635"ulong2 __ovld __cnfn rotate(ulong2 v, ulong2 i);\n"
36636"long3 __ovld __cnfn rotate(long3 v, long3 i);\n"
36637"ulong3 __ovld __cnfn rotate(ulong3 v, ulong3 i);\n"
36638"long4 __ovld __cnfn rotate(long4 v, long4 i);\n"
36639"ulong4 __ovld __cnfn rotate(ulong4 v, ulong4 i);\n"
36640"long8 __ovld __cnfn rotate(long8 v, long8 i);\n"
36641"ulong8 __ovld __cnfn rotate(ulong8 v, ulong8 i);\n"
36642"long16 __ovld __cnfn rotate(long16 v, long16 i);\n"
36643"ulong16 __ovld __cnfn rotate(ulong16 v, ulong16 i);\n"
36644"\n"
36645"/**\n"
36646" * Returns x - y and saturates the result.\n"
36647" */\n"
36648"char __ovld __cnfn sub_sat(char x, char y);\n"
36649"uchar __ovld __cnfn sub_sat(uchar x, uchar y);\n"
36650"char2 __ovld __cnfn sub_sat(char2 x, char2 y);\n"
36651"uchar2 __ovld __cnfn sub_sat(uchar2 x, uchar2 y);\n"
36652"char3 __ovld __cnfn sub_sat(char3 x, char3 y);\n"
36653"uchar3 __ovld __cnfn sub_sat(uchar3 x, uchar3 y);\n"
36654"char4 __ovld __cnfn sub_sat(char4 x, char4 y);\n"
36655"uchar4 __ovld __cnfn sub_sat(uchar4 x, uchar4 y);\n"
36656"char8 __ovld __cnfn sub_sat(char8 x, char8 y);\n"
36657"uchar8 __ovld __cnfn sub_sat(uchar8 x, uchar8 y);\n"
36658"char16 __ovld __cnfn sub_sat(char16 x, char16 y);\n"
36659"uchar16 __ovld __cnfn sub_sat(uchar16 x, uchar16 y);\n"
36660"short __ovld __cnfn sub_sat(short x, short y);\n"
36661"ushort __ovld __cnfn sub_sat(ushort x, ushort y);\n"
36662"short2 __ovld __cnfn sub_sat(short2 x, short2 y);\n"
36663"ushort2 __ovld __cnfn sub_sat(ushort2 x, ushort2 y);\n"
36664"short3 __ovld __cnfn sub_sat(short3 x, short3 y);\n"
36665"ushort3 __ovld __cnfn sub_sat(ushort3 x, ushort3 y);\n"
36666"short4 __ovld __cnfn sub_sat(short4 x, short4 y);\n"
36667"ushort4 __ovld __cnfn sub_sat(ushort4 x, ushort4 y);\n"
36668"short8 __ovld __cnfn sub_sat(short8 x, short8 y);\n"
36669"ushort8 __ovld __cnfn sub_sat(ushort8 x, ushort8 y);\n"
36670"short16 __ovld __cnfn sub_sat(short16 x, short16 y);\n"
36671"ushort16 __ovld __cnfn sub_sat(ushort16 x, ushort16 y);\n"
36672"int __ovld __cnfn sub_sat(int x, int y);\n"
36673"uint __ovld __cnfn sub_sat(uint x, uint y);\n"
36674"int2 __ovld __cnfn sub_sat(int2 x, int2 y);\n"
36675"uint2 __ovld __cnfn sub_sat(uint2 x, uint2 y);\n"
36676"int3 __ovld __cnfn sub_sat(int3 x, int3 y);\n"
36677"uint3 __ovld __cnfn sub_sat(uint3 x, uint3 y);\n"
36678"int4 __ovld __cnfn sub_sat(int4 x, int4 y);\n"
36679"uint4 __ovld __cnfn sub_sat(uint4 x, uint4 y);\n"
36680"int8 __ovld __cnfn sub_sat(int8 x, int8 y);\n"
36681"uint8 __ovld __cnfn sub_sat(uint8 x, uint8 y);\n"
36682"int16 __ovld __cnfn sub_sat(int16 x, int16 y);\n"
36683"uint16 __ovld __cnfn sub_sat(uint16 x, uint16 y);\n"
36684"long __ovld __cnfn sub_sat(long x, long y);\n"
36685"ulong __ovld __cnfn sub_sat(ulong x, ulong y);\n"
36686"long2 __ovld __cnfn sub_sat(long2 x, long2 y);\n"
36687"ulong2 __ovld __cnfn sub_sat(ulong2 x, ulong2 y);\n"
36688"long3 __ovld __cnfn sub_sat(long3 x, long3 y);\n"
36689"ulong3 __ovld __cnfn sub_sat(ulong3 x, ulong3 y);\n"
36690"long4 __ovld __cnfn sub_sat(long4 x, long4 y);\n"
36691"ulong4 __ovld __cnfn sub_sat(ulong4 x, ulong4 y);\n"
36692"long8 __ovld __cnfn sub_sat(long8 x, long8 y);\n"
36693"ulong8 __ovld __cnfn sub_sat(ulong8 x, ulong8 y);\n"
36694"long16 __ovld __cnfn sub_sat(long16 x, long16 y);\n"
36695"ulong16 __ovld __cnfn sub_sat(ulong16 x, ulong16 y);\n"
36696"\n"
36697"/**\n"
36698" * result[i] = ((short)hi[i] << 8) | lo[i]\n"
36699" * result[i] = ((ushort)hi[i] << 8) | lo[i]\n"
36700" */\n"
36701"short __ovld __cnfn upsample(char hi, uchar lo);\n"
36702"ushort __ovld __cnfn upsample(uchar hi, uchar lo);\n"
36703"short2 __ovld __cnfn upsample(char2 hi, uchar2 lo);\n"
36704"short3 __ovld __cnfn upsample(char3 hi, uchar3 lo);\n"
36705"short4 __ovld __cnfn upsample(char4 hi, uchar4 lo);\n"
36706"short8 __ovld __cnfn upsample(char8 hi, uchar8 lo);\n"
36707"short16 __ovld __cnfn upsample(char16 hi, uchar16 lo);\n"
36708"ushort2 __ovld __cnfn upsample(uchar2 hi, uchar2 lo);\n"
36709"ushort3 __ovld __cnfn upsample(uchar3 hi, uchar3 lo);\n"
36710"ushort4 __ovld __cnfn upsample(uchar4 hi, uchar4 lo);\n"
36711"ushort8 __ovld __cnfn upsample(uchar8 hi, uchar8 lo);\n"
36712"ushort16 __ovld __cnfn upsample(uchar16 hi, uchar16 lo);\n"
36713"\n"
36714"/**\n"
36715" * result[i] = ((int)hi[i] << 16) | lo[i]\n"
36716" * result[i] = ((uint)hi[i] << 16) | lo[i]\n"
36717" */\n"
36718"int __ovld __cnfn upsample(short hi, ushort lo);\n"
36719"uint __ovld __cnfn upsample(ushort hi, ushort lo);\n"
36720"int2 __ovld __cnfn upsample(short2 hi, ushort2 lo);\n"
36721"int3 __ovld __cnfn upsample(short3 hi, ushort3 lo);\n"
36722"int4 __ovld __cnfn upsample(short4 hi, ushort4 lo);\n"
36723"int8 __ovld __cnfn upsample(short8 hi, ushort8 lo);\n"
36724"int16 __ovld __cnfn upsample(short16 hi, ushort16 lo);\n"
36725"uint2 __ovld __cnfn upsample(ushort2 hi, ushort2 lo);\n"
36726"uint3 __ovld __cnfn upsample(ushort3 hi, ushort3 lo);\n"
36727"uint4 __ovld __cnfn upsample(ushort4 hi, ushort4 lo);\n"
36728"uint8 __ovld __cnfn upsample(ushort8 hi, ushort8 lo);\n"
36729"uint16 __ovld __cnfn upsample(ushort16 hi, ushort16 lo);\n"
36730"/**\n"
36731" * result[i] = ((long)hi[i] << 32) | lo[i]\n"
36732" * result[i] = ((ulong)hi[i] << 32) | lo[i]\n"
36733" */\n"
36734"long __ovld __cnfn upsample(int hi, uint lo);\n"
36735"ulong __ovld __cnfn upsample(uint hi, uint lo);\n"
36736"long2 __ovld __cnfn upsample(int2 hi, uint2 lo);\n"
36737"long3 __ovld __cnfn upsample(int3 hi, uint3 lo);\n"
36738"long4 __ovld __cnfn upsample(int4 hi, uint4 lo);\n"
36739"long8 __ovld __cnfn upsample(int8 hi, uint8 lo);\n"
36740"long16 __ovld __cnfn upsample(int16 hi, uint16 lo);\n"
36741"ulong2 __ovld __cnfn upsample(uint2 hi, uint2 lo);\n"
36742"ulong3 __ovld __cnfn upsample(uint3 hi, uint3 lo);\n"
36743"ulong4 __ovld __cnfn upsample(uint4 hi, uint4 lo);\n"
36744"ulong8 __ovld __cnfn upsample(uint8 hi, uint8 lo);\n"
36745"ulong16 __ovld __cnfn upsample(uint16 hi, uint16 lo);\n"
36746"\n"
36747"/*\n"
36748" * popcount(x): returns the number of set bit in x\n"
36749" */\n"
36750"char __ovld __cnfn popcount(char x);\n"
36751"uchar __ovld __cnfn popcount(uchar x);\n"
36752"char2 __ovld __cnfn popcount(char2 x);\n"
36753"uchar2 __ovld __cnfn popcount(uchar2 x);\n"
36754"char3 __ovld __cnfn popcount(char3 x);\n"
36755"uchar3 __ovld __cnfn popcount(uchar3 x);\n"
36756"char4 __ovld __cnfn popcount(char4 x);\n"
36757"uchar4 __ovld __cnfn popcount(uchar4 x);\n"
36758"char8 __ovld __cnfn popcount(char8 x);\n"
36759"uchar8 __ovld __cnfn popcount(uchar8 x);\n"
36760"char16 __ovld __cnfn popcount(char16 x);\n"
36761"uchar16 __ovld __cnfn popcount(uchar16 x);\n"
36762"short __ovld __cnfn popcount(short x);\n"
36763"ushort __ovld __cnfn popcount(ushort x);\n"
36764"short2 __ovld __cnfn popcount(short2 x);\n"
36765"ushort2 __ovld __cnfn popcount(ushort2 x);\n"
36766"short3 __ovld __cnfn popcount(short3 x);\n"
36767"ushort3 __ovld __cnfn popcount(ushort3 x);\n"
36768"short4 __ovld __cnfn popcount(short4 x);\n"
36769"ushort4 __ovld __cnfn popcount(ushort4 x);\n"
36770"short8 __ovld __cnfn popcount(short8 x);\n"
36771"ushort8 __ovld __cnfn popcount(ushort8 x);\n"
36772"short16 __ovld __cnfn popcount(short16 x);\n"
36773"ushort16 __ovld __cnfn popcount(ushort16 x);\n"
36774"int __ovld __cnfn popcount(int x);\n"
36775"uint __ovld __cnfn popcount(uint x);\n"
36776"int2 __ovld __cnfn popcount(int2 x);\n"
36777"uint2 __ovld __cnfn popcount(uint2 x);\n"
36778"int3 __ovld __cnfn popcount(int3 x);\n"
36779"uint3 __ovld __cnfn popcount(uint3 x);\n"
36780"int4 __ovld __cnfn popcount(int4 x);\n"
36781"uint4 __ovld __cnfn popcount(uint4 x);\n"
36782"int8 __ovld __cnfn popcount(int8 x);\n"
36783"uint8 __ovld __cnfn popcount(uint8 x);\n"
36784"int16 __ovld __cnfn popcount(int16 x);\n"
36785"uint16 __ovld __cnfn popcount(uint16 x);\n"
36786"long __ovld __cnfn popcount(long x);\n"
36787"ulong __ovld __cnfn popcount(ulong x);\n"
36788"long2 __ovld __cnfn popcount(long2 x);\n"
36789"ulong2 __ovld __cnfn popcount(ulong2 x);\n"
36790"long3 __ovld __cnfn popcount(long3 x);\n"
36791"ulong3 __ovld __cnfn popcount(ulong3 x);\n"
36792"long4 __ovld __cnfn popcount(long4 x);\n"
36793"ulong4 __ovld __cnfn popcount(ulong4 x);\n"
36794"long8 __ovld __cnfn popcount(long8 x);\n"
36795"ulong8 __ovld __cnfn popcount(ulong8 x);\n"
36796"long16 __ovld __cnfn popcount(long16 x);\n"
36797"ulong16 __ovld __cnfn popcount(ulong16 x);\n"
36798"\n"
36799"/**\n"
36800" * Multiply two 24-bit integer values x and y and add\n"
36801" * the 32-bit integer result to the 32-bit integer z.\n"
36802" * Refer to definition of mul24 to see how the 24-bit\n"
36803" * integer multiplication is performed.\n"
36804" */\n"
36805"int __ovld __cnfn mad24(int x, int y, int z);\n"
36806"uint __ovld __cnfn mad24(uint x, uint y, uint z);\n"
36807"int2 __ovld __cnfn mad24(int2 x, int2 y, int2 z);\n"
36808"uint2 __ovld __cnfn mad24(uint2 x, uint2 y, uint2 z);\n"
36809"int3 __ovld __cnfn mad24(int3 x, int3 y, int3 z);\n"
36810"uint3 __ovld __cnfn mad24(uint3 x, uint3 y, uint3 z);\n"
36811"int4 __ovld __cnfn mad24(int4 x, int4 y, int4 z);\n"
36812"uint4 __ovld __cnfn mad24(uint4 x, uint4 y, uint4 z);\n"
36813"int8 __ovld __cnfn mad24(int8 x, int8 y, int8 z);\n"
36814"uint8 __ovld __cnfn mad24(uint8 x, uint8 y, uint8 z);\n"
36815"int16 __ovld __cnfn mad24(int16 x, int16 y, int16 z);\n"
36816"uint16 __ovld __cnfn mad24(uint16 x, uint16 y, uint16 z);\n"
36817"\n"
36818"/**\n"
36819" * Multiply two 24-bit integer values x and y. x and y\n"
36820" * are 32-bit integers but only the low 24-bits are used\n"
36821" * to perform the multiplication. mul24 should only\n"
36822" * be used when values in x and y are in the range [-\n"
36823" * 2^23, 2^23-1] if x and y are signed integers and in the\n"
36824" * range [0, 2^24-1] if x and y are unsigned integers. If\n"
36825" * x and y are not in this range, the multiplication\n"
36826" * result is implementation-defined.\n"
36827" */\n"
36828"int __ovld __cnfn mul24(int x, int y);\n"
36829"uint __ovld __cnfn mul24(uint x, uint y);\n"
36830"int2 __ovld __cnfn mul24(int2 x, int2 y);\n"
36831"uint2 __ovld __cnfn mul24(uint2 x, uint2 y);\n"
36832"int3 __ovld __cnfn mul24(int3 x, int3 y);\n"
36833"uint3 __ovld __cnfn mul24(uint3 x, uint3 y);\n"
36834"int4 __ovld __cnfn mul24(int4 x, int4 y);\n"
36835"uint4 __ovld __cnfn mul24(uint4 x, uint4 y);\n"
36836"int8 __ovld __cnfn mul24(int8 x, int8 y);\n"
36837"uint8 __ovld __cnfn mul24(uint8 x, uint8 y);\n"
36838"int16 __ovld __cnfn mul24(int16 x, int16 y);\n"
36839"uint16 __ovld __cnfn mul24(uint16 x, uint16 y);\n"
36840"\n"
36841"// OpenCL v1.1 s6.11.4, v1.2 s6.12.4, v2.0 s6.13.4 - Common Functions\n"
36842"\n"
36843"/**\n"
36844" * Returns fmin(fmax(x, minval), maxval).\n"
36845" * Results are undefined if minval > maxval.\n"
36846" */\n"
36847"float __ovld __cnfn clamp(float x, float minval, float maxval);\n"
36848"float2 __ovld __cnfn clamp(float2 x, float2 minval, float2 maxval);\n"
36849"float3 __ovld __cnfn clamp(float3 x, float3 minval, float3 maxval);\n"
36850"float4 __ovld __cnfn clamp(float4 x, float4 minval, float4 maxval);\n"
36851"float8 __ovld __cnfn clamp(float8 x, float8 minval, float8 maxval);\n"
36852"float16 __ovld __cnfn clamp(float16 x, float16 minval, float16 maxval);\n"
36853"float2 __ovld __cnfn clamp(float2 x, float minval, float maxval);\n"
36854"float3 __ovld __cnfn clamp(float3 x, float minval, float maxval);\n"
36855"float4 __ovld __cnfn clamp(float4 x, float minval, float maxval);\n"
36856"float8 __ovld __cnfn clamp(float8 x, float minval, float maxval);\n"
36857"float16 __ovld __cnfn clamp(float16 x, float minval, float maxval);\n"
36858"#ifdef cl_khr_fp64\n"
36859"double __ovld __cnfn clamp(double x, double minval, double maxval);\n"
36860"double2 __ovld __cnfn clamp(double2 x, double2 minval, double2 maxval);\n"
36861"double3 __ovld __cnfn clamp(double3 x, double3 minval, double3 maxval);\n"
36862"double4 __ovld __cnfn clamp(double4 x, double4 minval, double4 maxval);\n"
36863"double8 __ovld __cnfn clamp(double8 x, double8 minval, double8 maxval);\n"
36864"double16 __ovld __cnfn clamp(double16 x, double16 minval, double16 maxval);\n"
36865"double2 __ovld __cnfn clamp(double2 x, double minval, double maxval);\n"
36866"double3 __ovld __cnfn clamp(double3 x, double minval, double maxval);\n"
36867"double4 __ovld __cnfn clamp(double4 x, double minval, double maxval);\n"
36868"double8 __ovld __cnfn clamp(double8 x, double minval, double maxval);\n"
36869"double16 __ovld __cnfn clamp(double16 x, double minval, double maxval);\n"
36870"#endif //cl_khr_fp64\n"
36871"#ifdef cl_khr_fp16\n"
36872"half __ovld __cnfn clamp(half x, half minval, half maxval);\n"
36873"half2 __ovld __cnfn clamp(half2 x, half2 minval, half2 maxval);\n"
36874"half3 __ovld __cnfn clamp(half3 x, half3 minval, half3 maxval);\n"
36875"half4 __ovld __cnfn clamp(half4 x, half4 minval, half4 maxval);\n"
36876"half8 __ovld __cnfn clamp(half8 x, half8 minval, half8 maxval);\n"
36877"half16 __ovld __cnfn clamp(half16 x, half16 minval, half16 maxval);\n"
36878"half2 __ovld __cnfn clamp(half2 x, half minval, half maxval);\n"
36879"half3 __ovld __cnfn clamp(half3 x, half minval, half maxval);\n"
36880"half4 __ovld __cnfn clamp(half4 x, half minval, half maxval);\n"
36881"half8 __ovld __cnfn clamp(half8 x, half minval, half maxval);\n"
36882"half16 __ovld __cnfn clamp(half16 x, half minval, half maxval);\n"
36883"#endif //cl_khr_fp16\n"
36884"\n"
36885"/**\n"
36886" * Converts radians to degrees, i.e. (180 / PI) *\n"
36887" * radians.\n"
36888" */\n"
36889"float __ovld __cnfn degrees(float radians);\n"
36890"float2 __ovld __cnfn degrees(float2 radians);\n"
36891"float3 __ovld __cnfn degrees(float3 radians);\n"
36892"float4 __ovld __cnfn degrees(float4 radians);\n"
36893"float8 __ovld __cnfn degrees(float8 radians);\n"
36894"float16 __ovld __cnfn degrees(float16 radians);\n"
36895"#ifdef cl_khr_fp64\n"
36896"double __ovld __cnfn degrees(double radians);\n"
36897"double2 __ovld __cnfn degrees(double2 radians);\n"
36898"double3 __ovld __cnfn degrees(double3 radians);\n"
36899"double4 __ovld __cnfn degrees(double4 radians);\n"
36900"double8 __ovld __cnfn degrees(double8 radians);\n"
36901"double16 __ovld __cnfn degrees(double16 radians);\n"
36902"#endif //cl_khr_fp64\n"
36903"#ifdef cl_khr_fp16\n"
36904"half __ovld __cnfn degrees(half radians);\n"
36905"half2 __ovld __cnfn degrees(half2 radians);\n"
36906"half3 __ovld __cnfn degrees(half3 radians);\n"
36907"half4 __ovld __cnfn degrees(half4 radians);\n"
36908"half8 __ovld __cnfn degrees(half8 radians);\n"
36909"half16 __ovld __cnfn degrees(half16 radians);\n"
36910"#endif //cl_khr_fp16\n"
36911"\n"
36912"/**\n"
36913" * Returns y if x < y, otherwise it returns x. If x and y\n"
36914" * are infinite or NaN, the return values are undefined.\n"
36915" */\n"
36916"float __ovld __cnfn max(float x, float y);\n"
36917"float2 __ovld __cnfn max(float2 x, float2 y);\n"
36918"float3 __ovld __cnfn max(float3 x, float3 y);\n"
36919"float4 __ovld __cnfn max(float4 x, float4 y);\n"
36920"float8 __ovld __cnfn max(float8 x, float8 y);\n"
36921"float16 __ovld __cnfn max(float16 x, float16 y);\n"
36922"float2 __ovld __cnfn max(float2 x, float y);\n"
36923"float3 __ovld __cnfn max(float3 x, float y);\n"
36924"float4 __ovld __cnfn max(float4 x, float y);\n"
36925"float8 __ovld __cnfn max(float8 x, float y);\n"
36926"float16 __ovld __cnfn max(float16 x, float y);\n"
36927"#ifdef cl_khr_fp64\n"
36928"double __ovld __cnfn max(double x, double y);\n"
36929"double2 __ovld __cnfn max(double2 x, double2 y);\n"
36930"double3 __ovld __cnfn max(double3 x, double3 y);\n"
36931"double4 __ovld __cnfn max(double4 x, double4 y);\n"
36932"double8 __ovld __cnfn max(double8 x, double8 y);\n"
36933"double16 __ovld __cnfn max(double16 x, double16 y);\n"
36934"double2 __ovld __cnfn max(double2 x, double y);\n"
36935"double3 __ovld __cnfn max(double3 x, double y);\n"
36936"double4 __ovld __cnfn max(double4 x, double y);\n"
36937"double8 __ovld __cnfn max(double8 x, double y);\n"
36938"double16 __ovld __cnfn max(double16 x, double y);\n"
36939"#endif //cl_khr_fp64\n"
36940"#ifdef cl_khr_fp16\n"
36941"half __ovld __cnfn max(half x, half y);\n"
36942"half2 __ovld __cnfn max(half2 x, half2 y);\n"
36943"half3 __ovld __cnfn max(half3 x, half3 y);\n"
36944"half4 __ovld __cnfn max(half4 x, half4 y);\n"
36945"half8 __ovld __cnfn max(half8 x, half8 y);\n"
36946"half16 __ovld __cnfn max(half16 x, half16 y);\n"
36947"half2 __ovld __cnfn max(half2 x, half y);\n"
36948"half3 __ovld __cnfn max(half3 x, half y);\n"
36949"half4 __ovld __cnfn max(half4 x, half y);\n"
36950"half8 __ovld __cnfn max(half8 x, half y);\n"
36951"half16 __ovld __cnfn max(half16 x, half y);\n"
36952"#endif //cl_khr_fp16\n"
36953"\n"
36954"/**\n"
36955" * Returns y if y < x, otherwise it returns x. If x and y\n"
36956" * are infinite or NaN, the return values are undefined.\n"
36957" */\n"
36958"float __ovld __cnfn min(float x, float y);\n"
36959"float2 __ovld __cnfn min(float2 x, float2 y);\n"
36960"float3 __ovld __cnfn min(float3 x, float3 y);\n"
36961"float4 __ovld __cnfn min(float4 x, float4 y);\n"
36962"float8 __ovld __cnfn min(float8 x, float8 y);\n"
36963"float16 __ovld __cnfn min(float16 x, float16 y);\n"
36964"float2 __ovld __cnfn min(float2 x, float y);\n"
36965"float3 __ovld __cnfn min(float3 x, float y);\n"
36966"float4 __ovld __cnfn min(float4 x, float y);\n"
36967"float8 __ovld __cnfn min(float8 x, float y);\n"
36968"float16 __ovld __cnfn min(float16 x, float y);\n"
36969"#ifdef cl_khr_fp64\n"
36970"double __ovld __cnfn min(double x, double y);\n"
36971"double2 __ovld __cnfn min(double2 x, double2 y);\n"
36972"double3 __ovld __cnfn min(double3 x, double3 y);\n"
36973"double4 __ovld __cnfn min(double4 x, double4 y);\n"
36974"double8 __ovld __cnfn min(double8 x, double8 y);\n"
36975"double16 __ovld __cnfn min(double16 x, double16 y);\n"
36976"double2 __ovld __cnfn min(double2 x, double y);\n"
36977"double3 __ovld __cnfn min(double3 x, double y);\n"
36978"double4 __ovld __cnfn min(double4 x, double y);\n"
36979"double8 __ovld __cnfn min(double8 x, double y);\n"
36980"double16 __ovld __cnfn min(double16 x, double y);\n"
36981"#endif //cl_khr_fp64\n"
36982"#ifdef cl_khr_fp16\n"
36983"half __ovld __cnfn min(half x, half y);\n"
36984"half2 __ovld __cnfn min(half2 x, half2 y);\n"
36985"half3 __ovld __cnfn min(half3 x, half3 y);\n"
36986"half4 __ovld __cnfn min(half4 x, half4 y);\n"
36987"half8 __ovld __cnfn min(half8 x, half8 y);\n"
36988"half16 __ovld __cnfn min(half16 x, half16 y);\n"
36989"half2 __ovld __cnfn min(half2 x, half y);\n"
36990"half3 __ovld __cnfn min(half3 x, half y);\n"
36991"half4 __ovld __cnfn min(half4 x, half y);\n"
36992"half8 __ovld __cnfn min(half8 x, half y);\n"
36993"half16 __ovld __cnfn min(half16 x, half y);\n"
36994"#endif //cl_khr_fp16\n"
36995"\n"
36996"/**\n"
36997" * Returns the linear blend of x & y implemented as:\n"
36998" * x + (y - x) * a\n"
36999" * a must be a value in the range 0.0 ... 1.0. If a is not\n"
37000" * in the range 0.0 ... 1.0, the return values are\n"
37001" * undefined.\n"
37002" */\n"
37003"float __ovld __cnfn mix(float x, float y, float a);\n"
37004"float2 __ovld __cnfn mix(float2 x, float2 y, float2 a);\n"
37005"float3 __ovld __cnfn mix(float3 x, float3 y, float3 a);\n"
37006"float4 __ovld __cnfn mix(float4 x, float4 y, float4 a);\n"
37007"float8 __ovld __cnfn mix(float8 x, float8 y, float8 a);\n"
37008"float16 __ovld __cnfn mix(float16 x, float16 y, float16 a);\n"
37009"float2 __ovld __cnfn mix(float2 x, float2 y, float a);\n"
37010"float3 __ovld __cnfn mix(float3 x, float3 y, float a);\n"
37011"float4 __ovld __cnfn mix(float4 x, float4 y, float a);\n"
37012"float8 __ovld __cnfn mix(float8 x, float8 y, float a);\n"
37013"float16 __ovld __cnfn mix(float16 x, float16 y, float a);\n"
37014"#ifdef cl_khr_fp64\n"
37015"double __ovld __cnfn mix(double x, double y, double a);\n"
37016"double2 __ovld __cnfn mix(double2 x, double2 y, double2 a);\n"
37017"double3 __ovld __cnfn mix(double3 x, double3 y, double3 a);\n"
37018"double4 __ovld __cnfn mix(double4 x, double4 y, double4 a);\n"
37019"double8 __ovld __cnfn mix(double8 x, double8 y, double8 a);\n"
37020"double16 __ovld __cnfn mix(double16 x, double16 y, double16 a);\n"
37021"double2 __ovld __cnfn mix(double2 x, double2 y, double a);\n"
37022"double3 __ovld __cnfn mix(double3 x, double3 y, double a);\n"
37023"double4 __ovld __cnfn mix(double4 x, double4 y, double a);\n"
37024"double8 __ovld __cnfn mix(double8 x, double8 y, double a);\n"
37025"double16 __ovld __cnfn mix(double16 x, double16 y, double a);\n"
37026"#endif //cl_khr_fp64\n"
37027"#ifdef cl_khr_fp16\n"
37028"half __ovld __cnfn mix(half x, half y, half a);\n"
37029"half2 __ovld __cnfn mix(half2 x, half2 y, half2 a);\n"
37030"half3 __ovld __cnfn mix(half3 x, half3 y, half3 a);\n"
37031"half4 __ovld __cnfn mix(half4 x, half4 y, half4 a);\n"
37032"half8 __ovld __cnfn mix(half8 x, half8 y, half8 a);\n"
37033"half16 __ovld __cnfn mix(half16 x, half16 y, half16 a);\n"
37034"half2 __ovld __cnfn mix(half2 x, half2 y, half a);\n"
37035"half3 __ovld __cnfn mix(half3 x, half3 y, half a);\n"
37036"half4 __ovld __cnfn mix(half4 x, half4 y, half a);\n"
37037"half8 __ovld __cnfn mix(half8 x, half8 y, half a);\n"
37038"half16 __ovld __cnfn mix(half16 x, half16 y, half a);\n"
37039"#endif //cl_khr_fp16\n"
37040"\n"
37041"/**\n"
37042" * Converts degrees to radians, i.e. (PI / 180) *\n"
37043" * degrees.\n"
37044" */\n"
37045"float __ovld __cnfn radians(float degrees);\n"
37046"float2 __ovld __cnfn radians(float2 degrees);\n"
37047"float3 __ovld __cnfn radians(float3 degrees);\n"
37048"float4 __ovld __cnfn radians(float4 degrees);\n"
37049"float8 __ovld __cnfn radians(float8 degrees);\n"
37050"float16 __ovld __cnfn radians(float16 degrees);\n"
37051"#ifdef cl_khr_fp64\n"
37052"double __ovld __cnfn radians(double degrees);\n"
37053"double2 __ovld __cnfn radians(double2 degrees);\n"
37054"double3 __ovld __cnfn radians(double3 degrees);\n"
37055"double4 __ovld __cnfn radians(double4 degrees);\n"
37056"double8 __ovld __cnfn radians(double8 degrees);\n"
37057"double16 __ovld __cnfn radians(double16 degrees);\n"
37058"#endif //cl_khr_fp64\n"
37059"#ifdef cl_khr_fp16\n"
37060"half __ovld __cnfn radians(half degrees);\n"
37061"half2 __ovld __cnfn radians(half2 degrees);\n"
37062"half3 __ovld __cnfn radians(half3 degrees);\n"
37063"half4 __ovld __cnfn radians(half4 degrees);\n"
37064"half8 __ovld __cnfn radians(half8 degrees);\n"
37065"half16 __ovld __cnfn radians(half16 degrees);\n"
37066"#endif //cl_khr_fp16\n"
37067"\n"
37068"/**\n"
37069" * Returns 0.0 if x < edge, otherwise it returns 1.0.\n"
37070" */\n"
37071"float __ovld __cnfn step(float edge, float x);\n"
37072"float2 __ovld __cnfn step(float2 edge, float2 x);\n"
37073"float3 __ovld __cnfn step(float3 edge, float3 x);\n"
37074"float4 __ovld __cnfn step(float4 edge, float4 x);\n"
37075"float8 __ovld __cnfn step(float8 edge, float8 x);\n"
37076"float16 __ovld __cnfn step(float16 edge, float16 x);\n"
37077"float2 __ovld __cnfn step(float edge, float2 x);\n"
37078"float3 __ovld __cnfn step(float edge, float3 x);\n"
37079"float4 __ovld __cnfn step(float edge, float4 x);\n"
37080"float8 __ovld __cnfn step(float edge, float8 x);\n"
37081"float16 __ovld __cnfn step(float edge, float16 x);\n"
37082"#ifdef cl_khr_fp64\n"
37083"double __ovld __cnfn step(double edge, double x);\n"
37084"double2 __ovld __cnfn step(double2 edge, double2 x);\n"
37085"double3 __ovld __cnfn step(double3 edge, double3 x);\n"
37086"double4 __ovld __cnfn step(double4 edge, double4 x);\n"
37087"double8 __ovld __cnfn step(double8 edge, double8 x);\n"
37088"double16 __ovld __cnfn step(double16 edge, double16 x);\n"
37089"double2 __ovld __cnfn step(double edge, double2 x);\n"
37090"double3 __ovld __cnfn step(double edge, double3 x);\n"
37091"double4 __ovld __cnfn step(double edge, double4 x);\n"
37092"double8 __ovld __cnfn step(double edge, double8 x);\n"
37093"double16 __ovld __cnfn step(double edge, double16 x);\n"
37094"#endif //cl_khr_fp64\n"
37095"#ifdef cl_khr_fp16\n"
37096"half __ovld __cnfn step(half edge, half x);\n"
37097"half2 __ovld __cnfn step(half2 edge, half2 x);\n"
37098"half3 __ovld __cnfn step(half3 edge, half3 x);\n"
37099"half4 __ovld __cnfn step(half4 edge, half4 x);\n"
37100"half8 __ovld __cnfn step(half8 edge, half8 x);\n"
37101"half16 __ovld __cnfn step(half16 edge, half16 x);\n"
37102"half __ovld __cnfn step(half edge, half x);\n"
37103"half2 __ovld __cnfn step(half edge, half2 x);\n"
37104"half3 __ovld __cnfn step(half edge, half3 x);\n"
37105"half4 __ovld __cnfn step(half edge, half4 x);\n"
37106"half8 __ovld __cnfn step(half edge, half8 x);\n"
37107"half16 __ovld __cnfn step(half edge, half16 x);\n"
37108"#endif //cl_khr_fp16\n"
37109"\n"
37110"/**\n"
37111" * Returns 0.0 if x <= edge0 and 1.0 if x >= edge1 and\n"
37112" * performs smooth Hermite interpolation between 0\n"
37113" * and 1when edge0 < x < edge1. This is useful in\n"
37114" * cases where you would want a threshold function\n"
37115" * with a smooth transition.\n"
37116" * This is equivalent to:\n"
37117" * gentype t;\n"
37118" * t = clamp ((x - edge0) / (edge1 - edge0), 0, 1);\n"
37119" * return t * t * (3 - 2 * t);\n"
37120" * Results are undefined if edge0 >= edge1 or if x,\n"
37121" * edge0 or edge1 is a NaN.\n"
37122" */\n"
37123"float __ovld __cnfn smoothstep(float edge0, float edge1, float x);\n"
37124"float2 __ovld __cnfn smoothstep(float2 edge0, float2 edge1, float2 x);\n"
37125"float3 __ovld __cnfn smoothstep(float3 edge0, float3 edge1, float3 x);\n"
37126"float4 __ovld __cnfn smoothstep(float4 edge0, float4 edge1, float4 x);\n"
37127"float8 __ovld __cnfn smoothstep(float8 edge0, float8 edge1, float8 x);\n"
37128"float16 __ovld __cnfn smoothstep(float16 edge0, float16 edge1, float16 x);\n"
37129"float2 __ovld __cnfn smoothstep(float edge0, float edge1, float2 x);\n"
37130"float3 __ovld __cnfn smoothstep(float edge0, float edge1, float3 x);\n"
37131"float4 __ovld __cnfn smoothstep(float edge0, float edge1, float4 x);\n"
37132"float8 __ovld __cnfn smoothstep(float edge0, float edge1, float8 x);\n"
37133"float16 __ovld __cnfn smoothstep(float edge0, float edge1, float16 x);\n"
37134"#ifdef cl_khr_fp64\n"
37135"double __ovld __cnfn smoothstep(double edge0, double edge1, double x);\n"
37136"double2 __ovld __cnfn smoothstep(double2 edge0, double2 edge1, double2 x);\n"
37137"double3 __ovld __cnfn smoothstep(double3 edge0, double3 edge1, double3 x);\n"
37138"double4 __ovld __cnfn smoothstep(double4 edge0, double4 edge1, double4 x);\n"
37139"double8 __ovld __cnfn smoothstep(double8 edge0, double8 edge1, double8 x);\n"
37140"double16 __ovld __cnfn smoothstep(double16 edge0, double16 edge1, double16 x);\n"
37141"double2 __ovld __cnfn smoothstep(double edge0, double edge1, double2 x);\n"
37142"double3 __ovld __cnfn smoothstep(double edge0, double edge1, double3 x);\n"
37143"double4 __ovld __cnfn smoothstep(double edge0, double edge1, double4 x);\n"
37144"double8 __ovld __cnfn smoothstep(double edge0, double edge1, double8 x);\n"
37145"double16 __ovld __cnfn smoothstep(double edge0, double edge1, double16 x);\n"
37146"#endif //cl_khr_fp64\n"
37147"#ifdef cl_khr_fp16\n"
37148"half __ovld __cnfn smoothstep(half edge0, half edge1, half x);\n"
37149"half2 __ovld __cnfn smoothstep(half2 edge0, half2 edge1, half2 x);\n"
37150"half3 __ovld __cnfn smoothstep(half3 edge0, half3 edge1, half3 x);\n"
37151"half4 __ovld __cnfn smoothstep(half4 edge0, half4 edge1, half4 x);\n"
37152"half8 __ovld __cnfn smoothstep(half8 edge0, half8 edge1, half8 x);\n"
37153"half16 __ovld __cnfn smoothstep(half16 edge0, half16 edge1, half16 x);\n"
37154"half __ovld __cnfn smoothstep(half edge0, half edge1, half x);\n"
37155"half2 __ovld __cnfn smoothstep(half edge0, half edge1, half2 x);\n"
37156"half3 __ovld __cnfn smoothstep(half edge0, half edge1, half3 x);\n"
37157"half4 __ovld __cnfn smoothstep(half edge0, half edge1, half4 x);\n"
37158"half8 __ovld __cnfn smoothstep(half edge0, half edge1, half8 x);\n"
37159"half16 __ovld __cnfn smoothstep(half edge0, half edge1, half16 x);\n"
37160"#endif //cl_khr_fp16\n"
37161"\n"
37162"/**\n"
37163" * Returns 1.0 if x > 0, -0.0 if x = -0.0, +0.0 if x =\n"
37164" * +0.0, or -1.0 if x < 0. Returns 0.0 if x is a NaN.\n"
37165" */\n"
37166"float __ovld __cnfn sign(float x);\n"
37167"float2 __ovld __cnfn sign(float2 x);\n"
37168"float3 __ovld __cnfn sign(float3 x);\n"
37169"float4 __ovld __cnfn sign(float4 x);\n"
37170"float8 __ovld __cnfn sign(float8 x);\n"
37171"float16 __ovld __cnfn sign(float16 x);\n"
37172"#ifdef cl_khr_fp64\n"
37173"double __ovld __cnfn sign(double x);\n"
37174"double2 __ovld __cnfn sign(double2 x);\n"
37175"double3 __ovld __cnfn sign(double3 x);\n"
37176"double4 __ovld __cnfn sign(double4 x);\n"
37177"double8 __ovld __cnfn sign(double8 x);\n"
37178"double16 __ovld __cnfn sign(double16 x);\n"
37179"#endif //cl_khr_fp64\n"
37180"#ifdef cl_khr_fp16\n"
37181"half __ovld __cnfn sign(half x);\n"
37182"half2 __ovld __cnfn sign(half2 x);\n"
37183"half3 __ovld __cnfn sign(half3 x);\n"
37184"half4 __ovld __cnfn sign(half4 x);\n"
37185"half8 __ovld __cnfn sign(half8 x);\n"
37186"half16 __ovld __cnfn sign(half16 x);\n"
37187"#endif //cl_khr_fp16\n"
37188"\n"
37189"// OpenCL v1.1 s6.11.5, v1.2 s6.12.5, v2.0 s6.13.5 - Geometric Functions\n"
37190"\n"
37191"/**\n"
37192" * Returns the cross product of p0.xyz and p1.xyz. The\n"
37193" * w component of float4 result returned will be 0.0.\n"
37194" */\n"
37195"float4 __ovld __cnfn cross(float4 p0, float4 p1);\n"
37196"float3 __ovld __cnfn cross(float3 p0, float3 p1);\n"
37197"#ifdef cl_khr_fp64\n"
37198"double4 __ovld __cnfn cross(double4 p0, double4 p1);\n"
37199"double3 __ovld __cnfn cross(double3 p0, double3 p1);\n"
37200"#endif //cl_khr_fp64\n"
37201"#ifdef cl_khr_fp16\n"
37202"half4 __ovld __cnfn cross(half4 p0, half4 p1);\n"
37203"half3 __ovld __cnfn cross(half3 p0, half3 p1);\n"
37204"#endif //cl_khr_fp16\n"
37205"\n"
37206"/**\n"
37207" * Compute dot product.\n"
37208" */\n"
37209"float __ovld __cnfn dot(float p0, float p1);\n"
37210"float __ovld __cnfn dot(float2 p0, float2 p1);\n"
37211"float __ovld __cnfn dot(float3 p0, float3 p1);\n"
37212"float __ovld __cnfn dot(float4 p0, float4 p1);\n"
37213"#ifdef cl_khr_fp64\n"
37214"double __ovld __cnfn dot(double p0, double p1);\n"
37215"double __ovld __cnfn dot(double2 p0, double2 p1);\n"
37216"double __ovld __cnfn dot(double3 p0, double3 p1);\n"
37217"double __ovld __cnfn dot(double4 p0, double4 p1);\n"
37218"#endif //cl_khr_fp64\n"
37219"#ifdef cl_khr_fp16\n"
37220"half __ovld __cnfn dot(half p0, half p1);\n"
37221"half __ovld __cnfn dot(half2 p0, half2 p1);\n"
37222"half __ovld __cnfn dot(half3 p0, half3 p1);\n"
37223"half __ovld __cnfn dot(half4 p0, half4 p1);\n"
37224"#endif //cl_khr_fp16\n"
37225"\n"
37226"/**\n"
37227" * Returns the distance between p0 and p1. This is\n"
37228" * calculated as length(p0 - p1).\n"
37229" */\n"
37230"float __ovld __cnfn distance(float p0, float p1);\n"
37231"float __ovld __cnfn distance(float2 p0, float2 p1);\n"
37232"float __ovld __cnfn distance(float3 p0, float3 p1);\n"
37233"float __ovld __cnfn distance(float4 p0, float4 p1);\n"
37234"#ifdef cl_khr_fp64\n"
37235"double __ovld __cnfn distance(double p0, double p1);\n"
37236"double __ovld __cnfn distance(double2 p0, double2 p1);\n"
37237"double __ovld __cnfn distance(double3 p0, double3 p1);\n"
37238"double __ovld __cnfn distance(double4 p0, double4 p1);\n"
37239"#endif //cl_khr_fp64\n"
37240"#ifdef cl_khr_fp16\n"
37241"half __ovld __cnfn distance(half p0, half p1);\n"
37242"half __ovld __cnfn distance(half2 p0, half2 p1);\n"
37243"half __ovld __cnfn distance(half3 p0, half3 p1);\n"
37244"half __ovld __cnfn distance(half4 p0, half4 p1);\n"
37245"#endif //cl_khr_fp16\n"
37246"\n"
37247"/**\n"
37248" * Return the length of vector p, i.e.,\n"
37249" * sqrt(p.x2 + p.y 2 + ...)\n"
37250" */\n"
37251"float __ovld __cnfn length(float p);\n"
37252"float __ovld __cnfn length(float2 p);\n"
37253"float __ovld __cnfn length(float3 p);\n"
37254"float __ovld __cnfn length(float4 p);\n"
37255"#ifdef cl_khr_fp64\n"
37256"double __ovld __cnfn length(double p);\n"
37257"double __ovld __cnfn length(double2 p);\n"
37258"double __ovld __cnfn length(double3 p);\n"
37259"double __ovld __cnfn length(double4 p);\n"
37260"#endif //cl_khr_fp64\n"
37261"#ifdef cl_khr_fp16\n"
37262"half __ovld __cnfn length(half p);\n"
37263"half __ovld __cnfn length(half2 p);\n"
37264"half __ovld __cnfn length(half3 p);\n"
37265"half __ovld __cnfn length(half4 p);\n"
37266"#endif //cl_khr_fp16\n"
37267"\n"
37268"/**\n"
37269" * Returns a vector in the same direction as p but with a\n"
37270" * length of 1.\n"
37271" */\n"
37272"float __ovld __cnfn normalize(float p);\n"
37273"float2 __ovld __cnfn normalize(float2 p);\n"
37274"float3 __ovld __cnfn normalize(float3 p);\n"
37275"float4 __ovld __cnfn normalize(float4 p);\n"
37276"#ifdef cl_khr_fp64\n"
37277"double __ovld __cnfn normalize(double p);\n"
37278"double2 __ovld __cnfn normalize(double2 p);\n"
37279"double3 __ovld __cnfn normalize(double3 p);\n"
37280"double4 __ovld __cnfn normalize(double4 p);\n"
37281"#endif //cl_khr_fp64\n"
37282"#ifdef cl_khr_fp16\n"
37283"half __ovld __cnfn normalize(half p);\n"
37284"half2 __ovld __cnfn normalize(half2 p);\n"
37285"half3 __ovld __cnfn normalize(half3 p);\n"
37286"half4 __ovld __cnfn normalize(half4 p);\n"
37287"#endif //cl_khr_fp16\n"
37288"\n"
37289"/**\n"
37290" * Returns fast_length(p0 - p1).\n"
37291" */\n"
37292"float __ovld __cnfn fast_distance(float p0, float p1);\n"
37293"float __ovld __cnfn fast_distance(float2 p0, float2 p1);\n"
37294"float __ovld __cnfn fast_distance(float3 p0, float3 p1);\n"
37295"float __ovld __cnfn fast_distance(float4 p0, float4 p1);\n"
37296"#ifdef cl_khr_fp16\n"
37297"half __ovld __cnfn fast_distance(half p0, half p1);\n"
37298"half __ovld __cnfn fast_distance(half2 p0, half2 p1);\n"
37299"half __ovld __cnfn fast_distance(half3 p0, half3 p1);\n"
37300"half __ovld __cnfn fast_distance(half4 p0, half4 p1);\n"
37301"#endif //cl_khr_fp16\n"
37302"\n"
37303"/**\n"
37304" * Returns the length of vector p computed as:\n"
37305" * half_sqrt(p.x2 + p.y2 + ...)\n"
37306" */\n"
37307"float __ovld __cnfn fast_length(float p);\n"
37308"float __ovld __cnfn fast_length(float2 p);\n"
37309"float __ovld __cnfn fast_length(float3 p);\n"
37310"float __ovld __cnfn fast_length(float4 p);\n"
37311"#ifdef cl_khr_fp16\n"
37312"half __ovld __cnfn fast_length(half p);\n"
37313"half __ovld __cnfn fast_length(half2 p);\n"
37314"half __ovld __cnfn fast_length(half3 p);\n"
37315"half __ovld __cnfn fast_length(half4 p);\n"
37316"#endif //cl_khr_fp16\n"
37317"\n"
37318"/**\n"
37319" * Returns a vector in the same direction as p but with a\n"
37320" * length of 1. fast_normalize is computed as:\n"
37321" * p * half_rsqrt (p.x^2 + p.y^2 + ... )\n"
37322" * The result shall be within 8192 ulps error from the\n"
37323" * infinitely precise result of\n"
37324" * if (all(p == 0.0f))\n"
37325" * result = p;\n"
37326" * else\n"
37327" * result = p / sqrt (p.x^2 + p.y^2 + ...);\n"
37328" * with the following exceptions:\n"
37329" * 1) If the sum of squares is greater than FLT_MAX\n"
37330" * then the value of the floating-point values in the\n"
37331" * result vector are undefined.\n"
37332" * 2) If the sum of squares is less than FLT_MIN then\n"
37333" * the implementation may return back p.\n"
37334" * 3) If the device is in \"denorms are flushed to zero\"\n"
37335" * mode, individual operand elements with magnitude\n"
37336" * less than sqrt(FLT_MIN) may be flushed to zero\n"
37337" * before proceeding with the calculation.\n"
37338" */\n"
37339"float __ovld __cnfn fast_normalize(float p);\n"
37340"float2 __ovld __cnfn fast_normalize(float2 p);\n"
37341"float3 __ovld __cnfn fast_normalize(float3 p);\n"
37342"float4 __ovld __cnfn fast_normalize(float4 p);\n"
37343"#ifdef cl_khr_fp16\n"
37344"half __ovld __cnfn fast_normalize(half p);\n"
37345"half2 __ovld __cnfn fast_normalize(half2 p);\n"
37346"half3 __ovld __cnfn fast_normalize(half3 p);\n"
37347"half4 __ovld __cnfn fast_normalize(half4 p);\n"
37348"#endif //cl_khr_fp16\n"
37349"\n"
37350"// OpenCL v1.1 s6.11.6, v1.2 s6.12.6, v2.0 s6.13.6 - Relational Functions\n"
37351"\n"
37352"/**\n"
37353" * intn isequal (floatn x, floatn y)\n"
37354" * Returns the component-wise compare of x == y.\n"
37355" */\n"
37356"int __ovld __cnfn isequal(float x, float y);\n"
37357"int2 __ovld __cnfn isequal(float2 x, float2 y);\n"
37358"int3 __ovld __cnfn isequal(float3 x, float3 y);\n"
37359"int4 __ovld __cnfn isequal(float4 x, float4 y);\n"
37360"int8 __ovld __cnfn isequal(float8 x, float8 y);\n"
37361"int16 __ovld __cnfn isequal(float16 x, float16 y);\n"
37362"#ifdef cl_khr_fp64\n"
37363"int __ovld __cnfn isequal(double x, double y);\n"
37364"long2 __ovld __cnfn isequal(double2 x, double2 y);\n"
37365"long3 __ovld __cnfn isequal(double3 x, double3 y);\n"
37366"long4 __ovld __cnfn isequal(double4 x, double4 y);\n"
37367"long8 __ovld __cnfn isequal(double8 x, double8 y);\n"
37368"long16 __ovld __cnfn isequal(double16 x, double16 y);\n"
37369"#endif //cl_khr_fp64\n"
37370"#ifdef cl_khr_fp16\n"
37371"int __ovld __cnfn isequal(half x, half y);\n"
37372"short2 __ovld __cnfn isequal(half2 x, half2 y);\n"
37373"short3 __ovld __cnfn isequal(half3 x, half3 y);\n"
37374"short4 __ovld __cnfn isequal(half4 x, half4 y);\n"
37375"short8 __ovld __cnfn isequal(half8 x, half8 y);\n"
37376"short16 __ovld __cnfn isequal(half16 x, half16 y);\n"
37377"#endif //cl_khr_fp16\n"
37378"\n"
37379"/**\n"
37380" * Returns the component-wise compare of x != y.\n"
37381" */\n"
37382"int __ovld __cnfn isnotequal(float x, float y);\n"
37383"int2 __ovld __cnfn isnotequal(float2 x, float2 y);\n"
37384"int3 __ovld __cnfn isnotequal(float3 x, float3 y);\n"
37385"int4 __ovld __cnfn isnotequal(float4 x, float4 y);\n"
37386"int8 __ovld __cnfn isnotequal(float8 x, float8 y);\n"
37387"int16 __ovld __cnfn isnotequal(float16 x, float16 y);\n"
37388"#ifdef cl_khr_fp64\n"
37389"int __ovld __cnfn isnotequal(double x, double y);\n"
37390"long2 __ovld __cnfn isnotequal(double2 x, double2 y);\n"
37391"long3 __ovld __cnfn isnotequal(double3 x, double3 y);\n"
37392"long4 __ovld __cnfn isnotequal(double4 x, double4 y);\n"
37393"long8 __ovld __cnfn isnotequal(double8 x, double8 y);\n"
37394"long16 __ovld __cnfn isnotequal(double16 x, double16 y);\n"
37395"#endif //cl_khr_fp64\n"
37396"#ifdef cl_khr_fp16\n"
37397"int __ovld __cnfn isnotequal(half x, half y);\n"
37398"short2 __ovld __cnfn isnotequal(half2 x, half2 y);\n"
37399"short3 __ovld __cnfn isnotequal(half3 x, half3 y);\n"
37400"short4 __ovld __cnfn isnotequal(half4 x, half4 y);\n"
37401"short8 __ovld __cnfn isnotequal(half8 x, half8 y);\n"
37402"short16 __ovld __cnfn isnotequal(half16 x, half16 y);\n"
37403"#endif //cl_khr_fp16\n"
37404"\n"
37405"/**\n"
37406" * Returns the component-wise compare of x > y.\n"
37407" */\n"
37408"int __ovld __cnfn isgreater(float x, float y);\n"
37409"int2 __ovld __cnfn isgreater(float2 x, float2 y);\n"
37410"int3 __ovld __cnfn isgreater(float3 x, float3 y);\n"
37411"int4 __ovld __cnfn isgreater(float4 x, float4 y);\n"
37412"int8 __ovld __cnfn isgreater(float8 x, float8 y);\n"
37413"int16 __ovld __cnfn isgreater(float16 x, float16 y);\n"
37414"#ifdef cl_khr_fp64\n"
37415"int __ovld __cnfn isgreater(double x, double y);\n"
37416"long2 __ovld __cnfn isgreater(double2 x, double2 y);\n"
37417"long3 __ovld __cnfn isgreater(double3 x, double3 y);\n"
37418"long4 __ovld __cnfn isgreater(double4 x, double4 y);\n"
37419"long8 __ovld __cnfn isgreater(double8 x, double8 y);\n"
37420"long16 __ovld __cnfn isgreater(double16 x, double16 y);\n"
37421"#endif //cl_khr_fp64\n"
37422"#ifdef cl_khr_fp16\n"
37423"int __ovld __cnfn isgreater(half x, half y);\n"
37424"short2 __ovld __cnfn isgreater(half2 x, half2 y);\n"
37425"short3 __ovld __cnfn isgreater(half3 x, half3 y);\n"
37426"short4 __ovld __cnfn isgreater(half4 x, half4 y);\n"
37427"short8 __ovld __cnfn isgreater(half8 x, half8 y);\n"
37428"short16 __ovld __cnfn isgreater(half16 x, half16 y);\n"
37429"#endif //cl_khr_fp16\n"
37430"\n"
37431"/**\n"
37432" * Returns the component-wise compare of x >= y.\n"
37433" */\n"
37434"int __ovld __cnfn isgreaterequal(float x, float y);\n"
37435"int2 __ovld __cnfn isgreaterequal(float2 x, float2 y);\n"
37436"int3 __ovld __cnfn isgreaterequal(float3 x, float3 y);\n"
37437"int4 __ovld __cnfn isgreaterequal(float4 x, float4 y);\n"
37438"int8 __ovld __cnfn isgreaterequal(float8 x, float8 y);\n"
37439"int16 __ovld __cnfn isgreaterequal(float16 x, float16 y);\n"
37440"#ifdef cl_khr_fp64\n"
37441"int __ovld __cnfn isgreaterequal(double x, double y);\n"
37442"long2 __ovld __cnfn isgreaterequal(double2 x, double2 y);\n"
37443"long3 __ovld __cnfn isgreaterequal(double3 x, double3 y);\n"
37444"long4 __ovld __cnfn isgreaterequal(double4 x, double4 y);\n"
37445"long8 __ovld __cnfn isgreaterequal(double8 x, double8 y);\n"
37446"long16 __ovld __cnfn isgreaterequal(double16 x, double16 y);\n"
37447"#endif //cl_khr_fp64\n"
37448"#ifdef cl_khr_fp16\n"
37449"int __ovld __cnfn isgreaterequal(half x, half y);\n"
37450"short2 __ovld __cnfn isgreaterequal(half2 x, half2 y);\n"
37451"short3 __ovld __cnfn isgreaterequal(half3 x, half3 y);\n"
37452"short4 __ovld __cnfn isgreaterequal(half4 x, half4 y);\n"
37453"short8 __ovld __cnfn isgreaterequal(half8 x, half8 y);\n"
37454"short16 __ovld __cnfn isgreaterequal(half16 x, half16 y);\n"
37455"#endif //cl_khr_fp16\n"
37456"\n"
37457"/**\n"
37458" * Returns the component-wise compare of x < y.\n"
37459" */\n"
37460"int __ovld __cnfn isless(float x, float y);\n"
37461"int2 __ovld __cnfn isless(float2 x, float2 y);\n"
37462"int3 __ovld __cnfn isless(float3 x, float3 y);\n"
37463"int4 __ovld __cnfn isless(float4 x, float4 y);\n"
37464"int8 __ovld __cnfn isless(float8 x, float8 y);\n"
37465"int16 __ovld __cnfn isless(float16 x, float16 y);\n"
37466"#ifdef cl_khr_fp64\n"
37467"int __ovld __cnfn isless(double x, double y);\n"
37468"long2 __ovld __cnfn isless(double2 x, double2 y);\n"
37469"long3 __ovld __cnfn isless(double3 x, double3 y);\n"
37470"long4 __ovld __cnfn isless(double4 x, double4 y);\n"
37471"long8 __ovld __cnfn isless(double8 x, double8 y);\n"
37472"long16 __ovld __cnfn isless(double16 x, double16 y);\n"
37473"#endif //cl_khr_fp64\n"
37474"#ifdef cl_khr_fp16\n"
37475"int __ovld __cnfn isless(half x, half y);\n"
37476"short2 __ovld __cnfn isless(half2 x, half2 y);\n"
37477"short3 __ovld __cnfn isless(half3 x, half3 y);\n"
37478"short4 __ovld __cnfn isless(half4 x, half4 y);\n"
37479"short8 __ovld __cnfn isless(half8 x, half8 y);\n"
37480"short16 __ovld __cnfn isless(half16 x, half16 y);\n"
37481"#endif //cl_khr_fp16\n"
37482"\n"
37483"/**\n"
37484" * Returns the component-wise compare of x <= y.\n"
37485" */\n"
37486"int __ovld __cnfn islessequal(float x, float y);\n"
37487"int2 __ovld __cnfn islessequal(float2 x, float2 y);\n"
37488"int3 __ovld __cnfn islessequal(float3 x, float3 y);\n"
37489"int4 __ovld __cnfn islessequal(float4 x, float4 y);\n"
37490"int8 __ovld __cnfn islessequal(float8 x, float8 y);\n"
37491"int16 __ovld __cnfn islessequal(float16 x, float16 y);\n"
37492"#ifdef cl_khr_fp64\n"
37493"int __ovld __cnfn islessequal(double x, double y);\n"
37494"long2 __ovld __cnfn islessequal(double2 x, double2 y);\n"
37495"long3 __ovld __cnfn islessequal(double3 x, double3 y);\n"
37496"long4 __ovld __cnfn islessequal(double4 x, double4 y);\n"
37497"long8 __ovld __cnfn islessequal(double8 x, double8 y);\n"
37498"long16 __ovld __cnfn islessequal(double16 x, double16 y);\n"
37499"#endif //cl_khr_fp64\n"
37500"#ifdef cl_khr_fp16\n"
37501"int __ovld __cnfn islessequal(half x, half y);\n"
37502"short2 __ovld __cnfn islessequal(half2 x, half2 y);\n"
37503"short3 __ovld __cnfn islessequal(half3 x, half3 y);\n"
37504"short4 __ovld __cnfn islessequal(half4 x, half4 y);\n"
37505"short8 __ovld __cnfn islessequal(half8 x, half8 y);\n"
37506"short16 __ovld __cnfn islessequal(half16 x, half16 y);\n"
37507"#endif //cl_khr_fp16\n"
37508"\n"
37509"/**\n"
37510" * Returns the component-wise compare of\n"
37511" * (x < y) || (x > y) .\n"
37512" */\n"
37513"int __ovld __cnfn islessgreater(float x, float y);\n"
37514"int2 __ovld __cnfn islessgreater(float2 x, float2 y);\n"
37515"int3 __ovld __cnfn islessgreater(float3 x, float3 y);\n"
37516"int4 __ovld __cnfn islessgreater(float4 x, float4 y);\n"
37517"int8 __ovld __cnfn islessgreater(float8 x, float8 y);\n"
37518"int16 __ovld __cnfn islessgreater(float16 x, float16 y);\n"
37519"#ifdef cl_khr_fp64\n"
37520"int __ovld __cnfn islessgreater(double x, double y);\n"
37521"long2 __ovld __cnfn islessgreater(double2 x, double2 y);\n"
37522"long3 __ovld __cnfn islessgreater(double3 x, double3 y);\n"
37523"long4 __ovld __cnfn islessgreater(double4 x, double4 y);\n"
37524"long8 __ovld __cnfn islessgreater(double8 x, double8 y);\n"
37525"long16 __ovld __cnfn islessgreater(double16 x, double16 y);\n"
37526"#endif //cl_khr_fp64\n"
37527"#ifdef cl_khr_fp16\n"
37528"int __ovld __cnfn islessgreater(half x, half y);\n"
37529"short2 __ovld __cnfn islessgreater(half2 x, half2 y);\n"
37530"short3 __ovld __cnfn islessgreater(half3 x, half3 y);\n"
37531"short4 __ovld __cnfn islessgreater(half4 x, half4 y);\n"
37532"short8 __ovld __cnfn islessgreater(half8 x, half8 y);\n"
37533"short16 __ovld __cnfn islessgreater(half16 x, half16 y);\n"
37534"#endif //cl_khr_fp16\n"
37535"\n"
37536"/**\n"
37537" * Test for finite value.\n"
37538" */\n"
37539"int __ovld __cnfn isfinite(float);\n"
37540"int2 __ovld __cnfn isfinite(float2);\n"
37541"int3 __ovld __cnfn isfinite(float3);\n"
37542"int4 __ovld __cnfn isfinite(float4);\n"
37543"int8 __ovld __cnfn isfinite(float8);\n"
37544"int16 __ovld __cnfn isfinite(float16);\n"
37545"#ifdef cl_khr_fp64\n"
37546"int __ovld __cnfn isfinite(double);\n"
37547"long2 __ovld __cnfn isfinite(double2);\n"
37548"long3 __ovld __cnfn isfinite(double3);\n"
37549"long4 __ovld __cnfn isfinite(double4);\n"
37550"long8 __ovld __cnfn isfinite(double8);\n"
37551"long16 __ovld __cnfn isfinite(double16);\n"
37552"#endif //cl_khr_fp64\n"
37553"#ifdef cl_khr_fp16\n"
37554"int __ovld __cnfn isfinite(half);\n"
37555"short2 __ovld __cnfn isfinite(half2);\n"
37556"short3 __ovld __cnfn isfinite(half3);\n"
37557"short4 __ovld __cnfn isfinite(half4);\n"
37558"short8 __ovld __cnfn isfinite(half8);\n"
37559"short16 __ovld __cnfn isfinite(half16);\n"
37560"#endif //cl_khr_fp16\n"
37561"\n"
37562"/**\n"
37563" * Test for infinity value (+ve or -ve) .\n"
37564" */\n"
37565"int __ovld __cnfn isinf(float);\n"
37566"int2 __ovld __cnfn isinf(float2);\n"
37567"int3 __ovld __cnfn isinf(float3);\n"
37568"int4 __ovld __cnfn isinf(float4);\n"
37569"int8 __ovld __cnfn isinf(float8);\n"
37570"int16 __ovld __cnfn isinf(float16);\n"
37571"#ifdef cl_khr_fp64\n"
37572"int __ovld __cnfn isinf(double);\n"
37573"long2 __ovld __cnfn isinf(double2);\n"
37574"long3 __ovld __cnfn isinf(double3);\n"
37575"long4 __ovld __cnfn isinf(double4);\n"
37576"long8 __ovld __cnfn isinf(double8);\n"
37577"long16 __ovld __cnfn isinf(double16);\n"
37578"#endif //cl_khr_fp64\n"
37579"#ifdef cl_khr_fp16\n"
37580"int __ovld __cnfn isinf(half);\n"
37581"short2 __ovld __cnfn isinf(half2);\n"
37582"short3 __ovld __cnfn isinf(half3);\n"
37583"short4 __ovld __cnfn isinf(half4);\n"
37584"short8 __ovld __cnfn isinf(half8);\n"
37585"short16 __ovld __cnfn isinf(half16);\n"
37586"#endif //cl_khr_fp16\n"
37587"\n"
37588"/**\n"
37589" * Test for a NaN.\n"
37590" */\n"
37591"int __ovld __cnfn isnan(float);\n"
37592"int2 __ovld __cnfn isnan(float2);\n"
37593"int3 __ovld __cnfn isnan(float3);\n"
37594"int4 __ovld __cnfn isnan(float4);\n"
37595"int8 __ovld __cnfn isnan(float8);\n"
37596"int16 __ovld __cnfn isnan(float16);\n"
37597"#ifdef cl_khr_fp64\n"
37598"int __ovld __cnfn isnan(double);\n"
37599"long2 __ovld __cnfn isnan(double2);\n"
37600"long3 __ovld __cnfn isnan(double3);\n"
37601"long4 __ovld __cnfn isnan(double4);\n"
37602"long8 __ovld __cnfn isnan(double8);\n"
37603"long16 __ovld __cnfn isnan(double16);\n"
37604"#endif //cl_khr_fp64\n"
37605"#ifdef cl_khr_fp16\n"
37606"int __ovld __cnfn isnan(half);\n"
37607"short2 __ovld __cnfn isnan(half2);\n"
37608"short3 __ovld __cnfn isnan(half3);\n"
37609"short4 __ovld __cnfn isnan(half4);\n"
37610"short8 __ovld __cnfn isnan(half8);\n"
37611"short16 __ovld __cnfn isnan(half16);\n"
37612"#endif //cl_khr_fp16\n"
37613"\n"
37614"/**\n"
37615" * Test for a normal value.\n"
37616" */\n"
37617"int __ovld __cnfn isnormal(float);\n"
37618"int2 __ovld __cnfn isnormal(float2);\n"
37619"int3 __ovld __cnfn isnormal(float3);\n"
37620"int4 __ovld __cnfn isnormal(float4);\n"
37621"int8 __ovld __cnfn isnormal(float8);\n"
37622"int16 __ovld __cnfn isnormal(float16);\n"
37623"#ifdef cl_khr_fp64\n"
37624"int __ovld __cnfn isnormal(double);\n"
37625"long2 __ovld __cnfn isnormal(double2);\n"
37626"long3 __ovld __cnfn isnormal(double3);\n"
37627"long4 __ovld __cnfn isnormal(double4);\n"
37628"long8 __ovld __cnfn isnormal(double8);\n"
37629"long16 __ovld __cnfn isnormal(double16);\n"
37630"#endif //cl_khr_fp64\n"
37631"#ifdef cl_khr_fp16\n"
37632"int __ovld __cnfn isnormal(half);\n"
37633"short2 __ovld __cnfn isnormal(half2);\n"
37634"short3 __ovld __cnfn isnormal(half3);\n"
37635"short4 __ovld __cnfn isnormal(half4);\n"
37636"short8 __ovld __cnfn isnormal(half8);\n"
37637"short16 __ovld __cnfn isnormal(half16);\n"
37638"#endif //cl_khr_fp16\n"
37639"\n"
37640"/**\n"
37641" * Test if arguments are ordered. isordered() takes\n"
37642" * arguments x and y, and returns the result\n"
37643" * isequal(x, x) && isequal(y, y).\n"
37644" */\n"
37645"int __ovld __cnfn isordered(float x, float y);\n"
37646"int2 __ovld __cnfn isordered(float2 x, float2 y);\n"
37647"int3 __ovld __cnfn isordered(float3 x, float3 y);\n"
37648"int4 __ovld __cnfn isordered(float4 x, float4 y);\n"
37649"int8 __ovld __cnfn isordered(float8 x, float8 y);\n"
37650"int16 __ovld __cnfn isordered(float16 x, float16 y);\n"
37651"#ifdef cl_khr_fp64\n"
37652"int __ovld __cnfn isordered(double x, double y);\n"
37653"long2 __ovld __cnfn isordered(double2 x, double2 y);\n"
37654"long3 __ovld __cnfn isordered(double3 x, double3 y);\n"
37655"long4 __ovld __cnfn isordered(double4 x, double4 y);\n"
37656"long8 __ovld __cnfn isordered(double8 x, double8 y);\n"
37657"long16 __ovld __cnfn isordered(double16 x, double16 y);\n"
37658"#endif //cl_khr_fp64\n"
37659"#ifdef cl_khr_fp16\n"
37660"int __ovld __cnfn isordered(half x, half y);\n"
37661"short2 __ovld __cnfn isordered(half2 x, half2 y);\n"
37662"short3 __ovld __cnfn isordered(half3 x, half3 y);\n"
37663"short4 __ovld __cnfn isordered(half4 x, half4 y);\n"
37664"short8 __ovld __cnfn isordered(half8 x, half8 y);\n"
37665"short16 __ovld __cnfn isordered(half16 x, half16 y);\n"
37666"#endif //cl_khr_fp16\n"
37667"\n"
37668"/**\n"
37669" * Test if arguments are unordered. isunordered()\n"
37670" * takes arguments x and y, returning non-zero if x or y\n"
37671" * is NaN, and zero otherwise.\n"
37672" */\n"
37673"int __ovld __cnfn isunordered(float x, float y);\n"
37674"int2 __ovld __cnfn isunordered(float2 x, float2 y);\n"
37675"int3 __ovld __cnfn isunordered(float3 x, float3 y);\n"
37676"int4 __ovld __cnfn isunordered(float4 x, float4 y);\n"
37677"int8 __ovld __cnfn isunordered(float8 x, float8 y);\n"
37678"int16 __ovld __cnfn isunordered(float16 x, float16 y);\n"
37679"#ifdef cl_khr_fp64\n"
37680"int __ovld __cnfn isunordered(double x, double y);\n"
37681"long2 __ovld __cnfn isunordered(double2 x, double2 y);\n"
37682"long3 __ovld __cnfn isunordered(double3 x, double3 y);\n"
37683"long4 __ovld __cnfn isunordered(double4 x, double4 y);\n"
37684"long8 __ovld __cnfn isunordered(double8 x, double8 y);\n"
37685"long16 __ovld __cnfn isunordered(double16 x, double16 y);\n"
37686"#endif //cl_khr_fp64\n"
37687"#ifdef cl_khr_fp16\n"
37688"int __ovld __cnfn isunordered(half x, half y);\n"
37689"short2 __ovld __cnfn isunordered(half2 x, half2 y);\n"
37690"short3 __ovld __cnfn isunordered(half3 x, half3 y);\n"
37691"short4 __ovld __cnfn isunordered(half4 x, half4 y);\n"
37692"short8 __ovld __cnfn isunordered(half8 x, half8 y);\n"
37693"short16 __ovld __cnfn isunordered(half16 x, half16 y);\n"
37694"#endif //cl_khr_fp16\n"
37695"\n"
37696"/**\n"
37697" * Test for sign bit. The scalar version of the function\n"
37698" * returns a 1 if the sign bit in the float is set else returns\n"
37699" * 0. The vector version of the function returns the\n"
37700" * following for each component in floatn: a -1 if the\n"
37701" * sign bit in the float is set else returns 0.\n"
37702" */\n"
37703"int __ovld __cnfn signbit(float);\n"
37704"int2 __ovld __cnfn signbit(float2);\n"
37705"int3 __ovld __cnfn signbit(float3);\n"
37706"int4 __ovld __cnfn signbit(float4);\n"
37707"int8 __ovld __cnfn signbit(float8);\n"
37708"int16 __ovld __cnfn signbit(float16);\n"
37709"#ifdef cl_khr_fp64\n"
37710"int __ovld __cnfn signbit(double);\n"
37711"long2 __ovld __cnfn signbit(double2);\n"
37712"long3 __ovld __cnfn signbit(double3);\n"
37713"long4 __ovld __cnfn signbit(double4);\n"
37714"long8 __ovld __cnfn signbit(double8);\n"
37715"long16 __ovld __cnfn signbit(double16);\n"
37716"#endif //cl_khr_fp64\n"
37717"#ifdef cl_khr_fp16\n"
37718"int __ovld __cnfn signbit(half);\n"
37719"short2 __ovld __cnfn signbit(half2);\n"
37720"short3 __ovld __cnfn signbit(half3);\n"
37721"short4 __ovld __cnfn signbit(half4);\n"
37722"short8 __ovld __cnfn signbit(half8);\n"
37723"short16 __ovld __cnfn signbit(half16);\n"
37724"#endif //cl_khr_fp16\n"
37725"\n"
37726"/**\n"
37727" * Returns 1 if the most significant bit in any component\n"
37728" * of x is set; otherwise returns 0.\n"
37729" */\n"
37730"int __ovld __cnfn any(char x);\n"
37731"int __ovld __cnfn any(char2 x);\n"
37732"int __ovld __cnfn any(char3 x);\n"
37733"int __ovld __cnfn any(char4 x);\n"
37734"int __ovld __cnfn any(char8 x);\n"
37735"int __ovld __cnfn any(char16 x);\n"
37736"int __ovld __cnfn any(short x);\n"
37737"int __ovld __cnfn any(short2 x);\n"
37738"int __ovld __cnfn any(short3 x);\n"
37739"int __ovld __cnfn any(short4 x);\n"
37740"int __ovld __cnfn any(short8 x);\n"
37741"int __ovld __cnfn any(short16 x);\n"
37742"int __ovld __cnfn any(int x);\n"
37743"int __ovld __cnfn any(int2 x);\n"
37744"int __ovld __cnfn any(int3 x);\n"
37745"int __ovld __cnfn any(int4 x);\n"
37746"int __ovld __cnfn any(int8 x);\n"
37747"int __ovld __cnfn any(int16 x);\n"
37748"int __ovld __cnfn any(long x);\n"
37749"int __ovld __cnfn any(long2 x);\n"
37750"int __ovld __cnfn any(long3 x);\n"
37751"int __ovld __cnfn any(long4 x);\n"
37752"int __ovld __cnfn any(long8 x);\n"
37753"int __ovld __cnfn any(long16 x);\n"
37754"\n"
37755"/**\n"
37756" * Returns 1 if the most significant bit in all components\n"
37757" * of x is set; otherwise returns 0.\n"
37758" */\n"
37759"int __ovld __cnfn all(char x);\n"
37760"int __ovld __cnfn all(char2 x);\n"
37761"int __ovld __cnfn all(char3 x);\n"
37762"int __ovld __cnfn all(char4 x);\n"
37763"int __ovld __cnfn all(char8 x);\n"
37764"int __ovld __cnfn all(char16 x);\n"
37765"int __ovld __cnfn all(short x);\n"
37766"int __ovld __cnfn all(short2 x);\n"
37767"int __ovld __cnfn all(short3 x);\n"
37768"int __ovld __cnfn all(short4 x);\n"
37769"int __ovld __cnfn all(short8 x);\n"
37770"int __ovld __cnfn all(short16 x);\n"
37771"int __ovld __cnfn all(int x);\n"
37772"int __ovld __cnfn all(int2 x);\n"
37773"int __ovld __cnfn all(int3 x);\n"
37774"int __ovld __cnfn all(int4 x);\n"
37775"int __ovld __cnfn all(int8 x);\n"
37776"int __ovld __cnfn all(int16 x);\n"
37777"int __ovld __cnfn all(long x);\n"
37778"int __ovld __cnfn all(long2 x);\n"
37779"int __ovld __cnfn all(long3 x);\n"
37780"int __ovld __cnfn all(long4 x);\n"
37781"int __ovld __cnfn all(long8 x);\n"
37782"int __ovld __cnfn all(long16 x);\n"
37783"\n"
37784"/**\n"
37785" * Each bit of the result is the corresponding bit of a if\n"
37786" * the corresponding bit of c is 0. Otherwise it is the\n"
37787" * corresponding bit of b.\n"
37788" */\n"
37789"char __ovld __cnfn bitselect(char a, char b, char c);\n"
37790"uchar __ovld __cnfn bitselect(uchar a, uchar b, uchar c);\n"
37791"char2 __ovld __cnfn bitselect(char2 a, char2 b, char2 c);\n"
37792"uchar2 __ovld __cnfn bitselect(uchar2 a, uchar2 b, uchar2 c);\n"
37793"char3 __ovld __cnfn bitselect(char3 a, char3 b, char3 c);\n"
37794"uchar3 __ovld __cnfn bitselect(uchar3 a, uchar3 b, uchar3 c);\n"
37795"char4 __ovld __cnfn bitselect(char4 a, char4 b, char4 c);\n"
37796"uchar4 __ovld __cnfn bitselect(uchar4 a, uchar4 b, uchar4 c);\n"
37797"char8 __ovld __cnfn bitselect(char8 a, char8 b, char8 c);\n"
37798"uchar8 __ovld __cnfn bitselect(uchar8 a, uchar8 b, uchar8 c);\n"
37799"char16 __ovld __cnfn bitselect(char16 a, char16 b, char16 c);\n"
37800"uchar16 __ovld __cnfn bitselect(uchar16 a, uchar16 b, uchar16 c);\n"
37801"short __ovld __cnfn bitselect(short a, short b, short c);\n"
37802"ushort __ovld __cnfn bitselect(ushort a, ushort b, ushort c);\n"
37803"short2 __ovld __cnfn bitselect(short2 a, short2 b, short2 c);\n"
37804"ushort2 __ovld __cnfn bitselect(ushort2 a, ushort2 b, ushort2 c);\n"
37805"short3 __ovld __cnfn bitselect(short3 a, short3 b, short3 c);\n"
37806"ushort3 __ovld __cnfn bitselect(ushort3 a, ushort3 b, ushort3 c);\n"
37807"short4 __ovld __cnfn bitselect(short4 a, short4 b, short4 c);\n"
37808"ushort4 __ovld __cnfn bitselect(ushort4 a, ushort4 b, ushort4 c);\n"
37809"short8 __ovld __cnfn bitselect(short8 a, short8 b, short8 c);\n"
37810"ushort8 __ovld __cnfn bitselect(ushort8 a, ushort8 b, ushort8 c);\n"
37811"short16 __ovld __cnfn bitselect(short16 a, short16 b, short16 c);\n"
37812"ushort16 __ovld __cnfn bitselect(ushort16 a, ushort16 b, ushort16 c);\n"
37813"int __ovld __cnfn bitselect(int a, int b, int c);\n"
37814"uint __ovld __cnfn bitselect(uint a, uint b, uint c);\n"
37815"int2 __ovld __cnfn bitselect(int2 a, int2 b, int2 c);\n"
37816"uint2 __ovld __cnfn bitselect(uint2 a, uint2 b, uint2 c);\n"
37817"int3 __ovld __cnfn bitselect(int3 a, int3 b, int3 c);\n"
37818"uint3 __ovld __cnfn bitselect(uint3 a, uint3 b, uint3 c);\n"
37819"int4 __ovld __cnfn bitselect(int4 a, int4 b, int4 c);\n"
37820"uint4 __ovld __cnfn bitselect(uint4 a, uint4 b, uint4 c);\n"
37821"int8 __ovld __cnfn bitselect(int8 a, int8 b, int8 c);\n"
37822"uint8 __ovld __cnfn bitselect(uint8 a, uint8 b, uint8 c);\n"
37823"int16 __ovld __cnfn bitselect(int16 a, int16 b, int16 c);\n"
37824"uint16 __ovld __cnfn bitselect(uint16 a, uint16 b, uint16 c);\n"
37825"long __ovld __cnfn bitselect(long a, long b, long c);\n"
37826"ulong __ovld __cnfn bitselect(ulong a, ulong b, ulong c);\n"
37827"long2 __ovld __cnfn bitselect(long2 a, long2 b, long2 c);\n"
37828"ulong2 __ovld __cnfn bitselect(ulong2 a, ulong2 b, ulong2 c);\n"
37829"long3 __ovld __cnfn bitselect(long3 a, long3 b, long3 c);\n"
37830"ulong3 __ovld __cnfn bitselect(ulong3 a, ulong3 b, ulong3 c);\n"
37831"long4 __ovld __cnfn bitselect(long4 a, long4 b, long4 c);\n"
37832"ulong4 __ovld __cnfn bitselect(ulong4 a, ulong4 b, ulong4 c);\n"
37833"long8 __ovld __cnfn bitselect(long8 a, long8 b, long8 c);\n"
37834"ulong8 __ovld __cnfn bitselect(ulong8 a, ulong8 b, ulong8 c);\n"
37835"long16 __ovld __cnfn bitselect(long16 a, long16 b, long16 c);\n"
37836"ulong16 __ovld __cnfn bitselect(ulong16 a, ulong16 b, ulong16 c);\n"
37837"float __ovld __cnfn bitselect(float a, float b, float c);\n"
37838"float2 __ovld __cnfn bitselect(float2 a, float2 b, float2 c);\n"
37839"float3 __ovld __cnfn bitselect(float3 a, float3 b, float3 c);\n"
37840"float4 __ovld __cnfn bitselect(float4 a, float4 b, float4 c);\n"
37841"float8 __ovld __cnfn bitselect(float8 a, float8 b, float8 c);\n"
37842"float16 __ovld __cnfn bitselect(float16 a, float16 b, float16 c);\n"
37843"#ifdef cl_khr_fp64\n"
37844"double __ovld __cnfn bitselect(double a, double b, double c);\n"
37845"double2 __ovld __cnfn bitselect(double2 a, double2 b, double2 c);\n"
37846"double3 __ovld __cnfn bitselect(double3 a, double3 b, double3 c);\n"
37847"double4 __ovld __cnfn bitselect(double4 a, double4 b, double4 c);\n"
37848"double8 __ovld __cnfn bitselect(double8 a, double8 b, double8 c);\n"
37849"double16 __ovld __cnfn bitselect(double16 a, double16 b, double16 c);\n"
37850"#endif //cl_khr_fp64\n"
37851"#ifdef cl_khr_fp16\n"
37852"half __ovld __cnfn bitselect(half a, half b, half c);\n"
37853"half2 __ovld __cnfn bitselect(half2 a, half2 b, half2 c);\n"
37854"half3 __ovld __cnfn bitselect(half3 a, half3 b, half3 c);\n"
37855"half4 __ovld __cnfn bitselect(half4 a, half4 b, half4 c);\n"
37856"half8 __ovld __cnfn bitselect(half8 a, half8 b, half8 c);\n"
37857"half16 __ovld __cnfn bitselect(half16 a, half16 b, half16 c);\n"
37858"#endif //cl_khr_fp16\n"
37859"\n"
37860"/**\n"
37861" * For each component of a vector type,\n"
37862" * result[i] = if MSB of c[i] is set ? b[i] : a[i].\n"
37863" * For a scalar type, result = c ? b : a.\n"
37864" * b and a must have the same type.\n"
37865" * c must have the same number of elements and bits as a.\n"
37866" */\n"
37867"char __ovld __cnfn select(char a, char b, char c);\n"
37868"uchar __ovld __cnfn select(uchar a, uchar b, char c);\n"
37869"char2 __ovld __cnfn select(char2 a, char2 b, char2 c);\n"
37870"uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, char2 c);\n"
37871"char3 __ovld __cnfn select(char3 a, char3 b, char3 c);\n"
37872"uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, char3 c);\n"
37873"char4 __ovld __cnfn select(char4 a, char4 b, char4 c);\n"
37874"uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, char4 c);\n"
37875"char8 __ovld __cnfn select(char8 a, char8 b, char8 c);\n"
37876"uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, char8 c);\n"
37877"char16 __ovld __cnfn select(char16 a, char16 b, char16 c);\n"
37878"uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, char16 c);\n"
37879"\n"
37880"short __ovld __cnfn select(short a, short b, short c);\n"
37881"ushort __ovld __cnfn select(ushort a, ushort b, short c);\n"
37882"short2 __ovld __cnfn select(short2 a, short2 b, short2 c);\n"
37883"ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, short2 c);\n"
37884"short3 __ovld __cnfn select(short3 a, short3 b, short3 c);\n"
37885"ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, short3 c);\n"
37886"short4 __ovld __cnfn select(short4 a, short4 b, short4 c);\n"
37887"ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, short4 c);\n"
37888"short8 __ovld __cnfn select(short8 a, short8 b, short8 c);\n"
37889"ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, short8 c);\n"
37890"short16 __ovld __cnfn select(short16 a, short16 b, short16 c);\n"
37891"ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, short16 c);\n"
37892"\n"
37893"int __ovld __cnfn select(int a, int b, int c);\n"
37894"uint __ovld __cnfn select(uint a, uint b, int c);\n"
37895"int2 __ovld __cnfn select(int2 a, int2 b, int2 c);\n"
37896"uint2 __ovld __cnfn select(uint2 a, uint2 b, int2 c);\n"
37897"int3 __ovld __cnfn select(int3 a, int3 b, int3 c);\n"
37898"uint3 __ovld __cnfn select(uint3 a, uint3 b, int3 c);\n"
37899"int4 __ovld __cnfn select(int4 a, int4 b, int4 c);\n"
37900"uint4 __ovld __cnfn select(uint4 a, uint4 b, int4 c);\n"
37901"int8 __ovld __cnfn select(int8 a, int8 b, int8 c);\n"
37902"uint8 __ovld __cnfn select(uint8 a, uint8 b, int8 c);\n"
37903"int16 __ovld __cnfn select(int16 a, int16 b, int16 c);\n"
37904"uint16 __ovld __cnfn select(uint16 a, uint16 b, int16 c);\n"
37905"float __ovld __cnfn select(float a, float b, int c);\n"
37906"float2 __ovld __cnfn select(float2 a, float2 b, int2 c);\n"
37907"float3 __ovld __cnfn select(float3 a, float3 b, int3 c);\n"
37908"float4 __ovld __cnfn select(float4 a, float4 b, int4 c);\n"
37909"float8 __ovld __cnfn select(float8 a, float8 b, int8 c);\n"
37910"float16 __ovld __cnfn select(float16 a, float16 b, int16 c);\n"
37911"\n"
37912"long __ovld __cnfn select(long a, long b, long c);\n"
37913"ulong __ovld __cnfn select(ulong a, ulong b, long c);\n"
37914"long2 __ovld __cnfn select(long2 a, long2 b, long2 c);\n"
37915"ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, long2 c);\n"
37916"long3 __ovld __cnfn select(long3 a, long3 b, long3 c);\n"
37917"ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, long3 c);\n"
37918"long4 __ovld __cnfn select(long4 a, long4 b, long4 c);\n"
37919"ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, long4 c);\n"
37920"long8 __ovld __cnfn select(long8 a, long8 b, long8 c);\n"
37921"ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, long8 c);\n"
37922"long16 __ovld __cnfn select(long16 a, long16 b, long16 c);\n"
37923"ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, long16 c);\n"
37924"\n"
37925"char __ovld __cnfn select(char a, char b, uchar c);\n"
37926"uchar __ovld __cnfn select(uchar a, uchar b, uchar c);\n"
37927"char2 __ovld __cnfn select(char2 a, char2 b, uchar2 c);\n"
37928"uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, uchar2 c);\n"
37929"char3 __ovld __cnfn select(char3 a, char3 b, uchar3 c);\n"
37930"uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, uchar3 c);\n"
37931"char4 __ovld __cnfn select(char4 a, char4 b, uchar4 c);\n"
37932"uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, uchar4 c);\n"
37933"char8 __ovld __cnfn select(char8 a, char8 b, uchar8 c);\n"
37934"uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, uchar8 c);\n"
37935"char16 __ovld __cnfn select(char16 a, char16 b, uchar16 c);\n"
37936"uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, uchar16 c);\n"
37937"\n"
37938"short __ovld __cnfn select(short a, short b, ushort c);\n"
37939"ushort __ovld __cnfn select(ushort a, ushort b, ushort c);\n"
37940"short2 __ovld __cnfn select(short2 a, short2 b, ushort2 c);\n"
37941"ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, ushort2 c);\n"
37942"short3 __ovld __cnfn select(short3 a, short3 b, ushort3 c);\n"
37943"ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, ushort3 c);\n"
37944"short4 __ovld __cnfn select(short4 a, short4 b, ushort4 c);\n"
37945"ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, ushort4 c);\n"
37946"short8 __ovld __cnfn select(short8 a, short8 b, ushort8 c);\n"
37947"ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, ushort8 c);\n"
37948"short16 __ovld __cnfn select(short16 a, short16 b, ushort16 c);\n"
37949"ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, ushort16 c);\n"
37950"\n"
37951"int __ovld __cnfn select(int a, int b, uint c);\n"
37952"uint __ovld __cnfn select(uint a, uint b, uint c);\n"
37953"int2 __ovld __cnfn select(int2 a, int2 b, uint2 c);\n"
37954"uint2 __ovld __cnfn select(uint2 a, uint2 b, uint2 c);\n"
37955"int3 __ovld __cnfn select(int3 a, int3 b, uint3 c);\n"
37956"uint3 __ovld __cnfn select(uint3 a, uint3 b, uint3 c);\n"
37957"int4 __ovld __cnfn select(int4 a, int4 b, uint4 c);\n"
37958"uint4 __ovld __cnfn select(uint4 a, uint4 b, uint4 c);\n"
37959"int8 __ovld __cnfn select(int8 a, int8 b, uint8 c);\n"
37960"uint8 __ovld __cnfn select(uint8 a, uint8 b, uint8 c);\n"
37961"int16 __ovld __cnfn select(int16 a, int16 b, uint16 c);\n"
37962"uint16 __ovld __cnfn select(uint16 a, uint16 b, uint16 c);\n"
37963"float __ovld __cnfn select(float a, float b, uint c);\n"
37964"float2 __ovld __cnfn select(float2 a, float2 b, uint2 c);\n"
37965"float3 __ovld __cnfn select(float3 a, float3 b, uint3 c);\n"
37966"float4 __ovld __cnfn select(float4 a, float4 b, uint4 c);\n"
37967"float8 __ovld __cnfn select(float8 a, float8 b, uint8 c);\n"
37968"float16 __ovld __cnfn select(float16 a, float16 b, uint16 c);\n"
37969"\n"
37970"long __ovld __cnfn select(long a, long b, ulong c);\n"
37971"ulong __ovld __cnfn select(ulong a, ulong b, ulong c);\n"
37972"long2 __ovld __cnfn select(long2 a, long2 b, ulong2 c);\n"
37973"ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, ulong2 c);\n"
37974"long3 __ovld __cnfn select(long3 a, long3 b, ulong3 c);\n"
37975"ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, ulong3 c);\n"
37976"long4 __ovld __cnfn select(long4 a, long4 b, ulong4 c);\n"
37977"ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, ulong4 c);\n"
37978"long8 __ovld __cnfn select(long8 a, long8 b, ulong8 c);\n"
37979"ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, ulong8 c);\n"
37980"long16 __ovld __cnfn select(long16 a, long16 b, ulong16 c);\n"
37981"ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, ulong16 c);\n"
37982"\n"
37983"#ifdef cl_khr_fp64\n"
37984"double __ovld __cnfn select(double a, double b, long c);\n"
37985"double2 __ovld __cnfn select(double2 a, double2 b, long2 c);\n"
37986"double3 __ovld __cnfn select(double3 a, double3 b, long3 c);\n"
37987"double4 __ovld __cnfn select(double4 a, double4 b, long4 c);\n"
37988"double8 __ovld __cnfn select(double8 a, double8 b, long8 c);\n"
37989"double16 __ovld __cnfn select(double16 a, double16 b, long16 c);\n"
37990"double __ovld __cnfn select(double a, double b, ulong c);\n"
37991"double2 __ovld __cnfn select(double2 a, double2 b, ulong2 c);\n"
37992"double3 __ovld __cnfn select(double3 a, double3 b, ulong3 c);\n"
37993"double4 __ovld __cnfn select(double4 a, double4 b, ulong4 c);\n"
37994"double8 __ovld __cnfn select(double8 a, double8 b, ulong8 c);\n"
37995"double16 __ovld __cnfn select(double16 a, double16 b, ulong16 c);\n"
37996"#endif //cl_khr_fp64\n"
37997"#ifdef cl_khr_fp16\n"
37998"half __ovld __cnfn select(half a, half b, short c);\n"
37999"half2 __ovld __cnfn select(half2 a, half2 b, short2 c);\n"
38000"half3 __ovld __cnfn select(half3 a, half3 b, short3 c);\n"
38001"half4 __ovld __cnfn select(half4 a, half4 b, short4 c);\n"
38002"half8 __ovld __cnfn select(half8 a, half8 b, short8 c);\n"
38003"half16 __ovld __cnfn select(half16 a, half16 b, short16 c);\n"
38004"half __ovld __cnfn select(half a, half b, ushort c);\n"
38005"half2 __ovld __cnfn select(half2 a, half2 b, ushort2 c);\n"
38006"half3 __ovld __cnfn select(half3 a, half3 b, ushort3 c);\n"
38007"half4 __ovld __cnfn select(half4 a, half4 b, ushort4 c);\n"
38008"half8 __ovld __cnfn select(half8 a, half8 b, ushort8 c);\n"
38009"half16 __ovld __cnfn select(half16 a, half16 b, ushort16 c);\n"
38010"#endif //cl_khr_fp16\n"
38011"\n"
38012"// OpenCL v1.1 s6.11.7, v1.2 s6.12.7, v2.0 s6.13.7 - Vector Data Load and Store Functions\n"
38013"// OpenCL extensions v1.1 s9.6.6, v1.2 s9.5.6, v2.0 s9.4.6 - Vector Data Load and Store Functions for Half Type\n"
38014"/**\n"
38015" * Use generic type gentype to indicate the built-in data types\n"
38016" * char, uchar, short, ushort, int, uint, long, ulong, float,\n"
38017" * double or half.\n"
38018" *\n"
38019" * vloadn return sizeof (gentypen) bytes of data read from address (p + (offset * n)).\n"
38020" *\n"
38021" * vstoren write sizeof (gentypen) bytes given by data to address (p + (offset * n)).\n"
38022" *\n"
38023" * The address computed as (p + (offset * n)) must be\n"
38024" * 8-bit aligned if gentype is char, uchar;\n"
38025" * 16-bit aligned if gentype is short, ushort, half;\n"
38026" * 32-bit aligned if gentype is int, uint, float;\n"
38027" * 64-bit aligned if gentype is long, ulong, double.\n"
38028" */\n"
38029"\n"
38030"char2 __ovld vload2(size_t offset, const __constant char *p);\n"
38031"uchar2 __ovld vload2(size_t offset, const __constant uchar *p);\n"
38032"short2 __ovld vload2(size_t offset, const __constant short *p);\n"
38033"ushort2 __ovld vload2(size_t offset, const __constant ushort *p);\n"
38034"int2 __ovld vload2(size_t offset, const __constant int *p);\n"
38035"uint2 __ovld vload2(size_t offset, const __constant uint *p);\n"
38036"long2 __ovld vload2(size_t offset, const __constant long *p);\n"
38037"ulong2 __ovld vload2(size_t offset, const __constant ulong *p);\n"
38038"float2 __ovld vload2(size_t offset, const __constant float *p);\n"
38039"char3 __ovld vload3(size_t offset, const __constant char *p);\n"
38040"uchar3 __ovld vload3(size_t offset, const __constant uchar *p);\n"
38041"short3 __ovld vload3(size_t offset, const __constant short *p);\n"
38042"ushort3 __ovld vload3(size_t offset, const __constant ushort *p);\n"
38043"int3 __ovld vload3(size_t offset, const __constant int *p);\n"
38044"uint3 __ovld vload3(size_t offset, const __constant uint *p);\n"
38045"long3 __ovld vload3(size_t offset, const __constant long *p);\n"
38046"ulong3 __ovld vload3(size_t offset, const __constant ulong *p);\n"
38047"float3 __ovld vload3(size_t offset, const __constant float *p);\n"
38048"char4 __ovld vload4(size_t offset, const __constant char *p);\n"
38049"uchar4 __ovld vload4(size_t offset, const __constant uchar *p);\n"
38050"short4 __ovld vload4(size_t offset, const __constant short *p);\n"
38051"ushort4 __ovld vload4(size_t offset, const __constant ushort *p);\n"
38052"int4 __ovld vload4(size_t offset, const __constant int *p);\n"
38053"uint4 __ovld vload4(size_t offset, const __constant uint *p);\n"
38054"long4 __ovld vload4(size_t offset, const __constant long *p);\n"
38055"ulong4 __ovld vload4(size_t offset, const __constant ulong *p);\n"
38056"float4 __ovld vload4(size_t offset, const __constant float *p);\n"
38057"char8 __ovld vload8(size_t offset, const __constant char *p);\n"
38058"uchar8 __ovld vload8(size_t offset, const __constant uchar *p);\n"
38059"short8 __ovld vload8(size_t offset, const __constant short *p);\n"
38060"ushort8 __ovld vload8(size_t offset, const __constant ushort *p);\n"
38061"int8 __ovld vload8(size_t offset, const __constant int *p);\n"
38062"uint8 __ovld vload8(size_t offset, const __constant uint *p);\n"
38063"long8 __ovld vload8(size_t offset, const __constant long *p);\n"
38064"ulong8 __ovld vload8(size_t offset, const __constant ulong *p);\n"
38065"float8 __ovld vload8(size_t offset, const __constant float *p);\n"
38066"char16 __ovld vload16(size_t offset, const __constant char *p);\n"
38067"uchar16 __ovld vload16(size_t offset, const __constant uchar *p);\n"
38068"short16 __ovld vload16(size_t offset, const __constant short *p);\n"
38069"ushort16 __ovld vload16(size_t offset, const __constant ushort *p);\n"
38070"int16 __ovld vload16(size_t offset, const __constant int *p);\n"
38071"uint16 __ovld vload16(size_t offset, const __constant uint *p);\n"
38072"long16 __ovld vload16(size_t offset, const __constant long *p);\n"
38073"ulong16 __ovld vload16(size_t offset, const __constant ulong *p);\n"
38074"float16 __ovld vload16(size_t offset, const __constant float *p);\n"
38075"#ifdef cl_khr_fp64\n"
38076"double2 __ovld vload2(size_t offset, const __constant double *p);\n"
38077"double3 __ovld vload3(size_t offset, const __constant double *p);\n"
38078"double4 __ovld vload4(size_t offset, const __constant double *p);\n"
38079"double8 __ovld vload8(size_t offset, const __constant double *p);\n"
38080"double16 __ovld vload16(size_t offset, const __constant double *p);\n"
38081"#endif //cl_khr_fp64\n"
38082"\n"
38083"#ifdef cl_khr_fp16\n"
38084"half __ovld vload(size_t offset, const __constant half *p);\n"
38085"half2 __ovld vload2(size_t offset, const __constant half *p);\n"
38086"half3 __ovld vload3(size_t offset, const __constant half *p);\n"
38087"half4 __ovld vload4(size_t offset, const __constant half *p);\n"
38088"half8 __ovld vload8(size_t offset, const __constant half *p);\n"
38089"half16 __ovld vload16(size_t offset, const __constant half *p);\n"
38090"#endif //cl_khr_fp16\n"
38091"\n"
38092"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
38093"char2 __ovld vload2(size_t offset, const char *p);\n"
38094"uchar2 __ovld vload2(size_t offset, const uchar *p);\n"
38095"short2 __ovld vload2(size_t offset, const short *p);\n"
38096"ushort2 __ovld vload2(size_t offset, const ushort *p);\n"
38097"int2 __ovld vload2(size_t offset, const int *p);\n"
38098"uint2 __ovld vload2(size_t offset, const uint *p);\n"
38099"long2 __ovld vload2(size_t offset, const long *p);\n"
38100"ulong2 __ovld vload2(size_t offset, const ulong *p);\n"
38101"float2 __ovld vload2(size_t offset, const float *p);\n"
38102"char3 __ovld vload3(size_t offset, const char *p);\n"
38103"uchar3 __ovld vload3(size_t offset, const uchar *p);\n"
38104"short3 __ovld vload3(size_t offset, const short *p);\n"
38105"ushort3 __ovld vload3(size_t offset, const ushort *p);\n"
38106"int3 __ovld vload3(size_t offset, const int *p);\n"
38107"uint3 __ovld vload3(size_t offset, const uint *p);\n"
38108"long3 __ovld vload3(size_t offset, const long *p);\n"
38109"ulong3 __ovld vload3(size_t offset, const ulong *p);\n"
38110"float3 __ovld vload3(size_t offset, const float *p);\n"
38111"char4 __ovld vload4(size_t offset, const char *p);\n"
38112"uchar4 __ovld vload4(size_t offset, const uchar *p);\n"
38113"short4 __ovld vload4(size_t offset, const short *p);\n"
38114"ushort4 __ovld vload4(size_t offset, const ushort *p);\n"
38115"int4 __ovld vload4(size_t offset, const int *p);\n"
38116"uint4 __ovld vload4(size_t offset, const uint *p);\n"
38117"long4 __ovld vload4(size_t offset, const long *p);\n"
38118"ulong4 __ovld vload4(size_t offset, const ulong *p);\n"
38119"float4 __ovld vload4(size_t offset, const float *p);\n"
38120"char8 __ovld vload8(size_t offset, const char *p);\n"
38121"uchar8 __ovld vload8(size_t offset, const uchar *p);\n"
38122"short8 __ovld vload8(size_t offset, const short *p);\n"
38123"ushort8 __ovld vload8(size_t offset, const ushort *p);\n"
38124"int8 __ovld vload8(size_t offset, const int *p);\n"
38125"uint8 __ovld vload8(size_t offset, const uint *p);\n"
38126"long8 __ovld vload8(size_t offset, const long *p);\n"
38127"ulong8 __ovld vload8(size_t offset, const ulong *p);\n"
38128"float8 __ovld vload8(size_t offset, const float *p);\n"
38129"char16 __ovld vload16(size_t offset, const char *p);\n"
38130"uchar16 __ovld vload16(size_t offset, const uchar *p);\n"
38131"short16 __ovld vload16(size_t offset, const short *p);\n"
38132"ushort16 __ovld vload16(size_t offset, const ushort *p);\n"
38133"int16 __ovld vload16(size_t offset, const int *p);\n"
38134"uint16 __ovld vload16(size_t offset, const uint *p);\n"
38135"long16 __ovld vload16(size_t offset, const long *p);\n"
38136"ulong16 __ovld vload16(size_t offset, const ulong *p);\n"
38137"float16 __ovld vload16(size_t offset, const float *p);\n"
38138"\n"
38139"#ifdef cl_khr_fp64\n"
38140"double2 __ovld vload2(size_t offset, const double *p);\n"
38141"double3 __ovld vload3(size_t offset, const double *p);\n"
38142"double4 __ovld vload4(size_t offset, const double *p);\n"
38143"double8 __ovld vload8(size_t offset, const double *p);\n"
38144"double16 __ovld vload16(size_t offset, const double *p);\n"
38145"#endif //cl_khr_fp64\n"
38146"\n"
38147"#ifdef cl_khr_fp16\n"
38148"half __ovld vload(size_t offset, const half *p);\n"
38149"half2 __ovld vload2(size_t offset, const half *p);\n"
38150"half3 __ovld vload3(size_t offset, const half *p);\n"
38151"half4 __ovld vload4(size_t offset, const half *p);\n"
38152"half8 __ovld vload8(size_t offset, const half *p);\n"
38153"half16 __ovld vload16(size_t offset, const half *p);\n"
38154"#endif //cl_khr_fp16\n"
38155"#else\n"
38156"char2 __ovld vload2(size_t offset, const __global char *p);\n"
38157"uchar2 __ovld vload2(size_t offset, const __global uchar *p);\n"
38158"short2 __ovld vload2(size_t offset, const __global short *p);\n"
38159"ushort2 __ovld vload2(size_t offset, const __global ushort *p);\n"
38160"int2 __ovld vload2(size_t offset, const __global int *p);\n"
38161"uint2 __ovld vload2(size_t offset, const __global uint *p);\n"
38162"long2 __ovld vload2(size_t offset, const __global long *p);\n"
38163"ulong2 __ovld vload2(size_t offset, const __global ulong *p);\n"
38164"float2 __ovld vload2(size_t offset, const __global float *p);\n"
38165"char3 __ovld vload3(size_t offset, const __global char *p);\n"
38166"uchar3 __ovld vload3(size_t offset, const __global uchar *p);\n"
38167"short3 __ovld vload3(size_t offset, const __global short *p);\n"
38168"ushort3 __ovld vload3(size_t offset, const __global ushort *p);\n"
38169"int3 __ovld vload3(size_t offset, const __global int *p);\n"
38170"uint3 __ovld vload3(size_t offset, const __global uint *p);\n"
38171"long3 __ovld vload3(size_t offset, const __global long *p);\n"
38172"ulong3 __ovld vload3(size_t offset, const __global ulong *p);\n"
38173"float3 __ovld vload3(size_t offset, const __global float *p);\n"
38174"char4 __ovld vload4(size_t offset, const __global char *p);\n"
38175"uchar4 __ovld vload4(size_t offset, const __global uchar *p);\n"
38176"short4 __ovld vload4(size_t offset, const __global short *p);\n"
38177"ushort4 __ovld vload4(size_t offset, const __global ushort *p);\n"
38178"int4 __ovld vload4(size_t offset, const __global int *p);\n"
38179"uint4 __ovld vload4(size_t offset, const __global uint *p);\n"
38180"long4 __ovld vload4(size_t offset, const __global long *p);\n"
38181"ulong4 __ovld vload4(size_t offset, const __global ulong *p);\n"
38182"float4 __ovld vload4(size_t offset, const __global float *p);\n"
38183"char8 __ovld vload8(size_t offset, const __global char *p);\n"
38184"uchar8 __ovld vload8(size_t offset, const __global uchar *p);\n"
38185"short8 __ovld vload8(size_t offset, const __global short *p);\n"
38186"ushort8 __ovld vload8(size_t offset, const __global ushort *p);\n"
38187"int8 __ovld vload8(size_t offset, const __global int *p);\n"
38188"uint8 __ovld vload8(size_t offset, const __global uint *p);\n"
38189"long8 __ovld vload8(size_t offset, const __global long *p);\n"
38190"ulong8 __ovld vload8(size_t offset, const __global ulong *p);\n"
38191"float8 __ovld vload8(size_t offset, const __global float *p);\n"
38192"char16 __ovld vload16(size_t offset, const __global char *p);\n"
38193"uchar16 __ovld vload16(size_t offset, const __global uchar *p);\n"
38194"short16 __ovld vload16(size_t offset, const __global short *p);\n"
38195"ushort16 __ovld vload16(size_t offset, const __global ushort *p);\n"
38196"int16 __ovld vload16(size_t offset, const __global int *p);\n"
38197"uint16 __ovld vload16(size_t offset, const __global uint *p);\n"
38198"long16 __ovld vload16(size_t offset, const __global long *p);\n"
38199"ulong16 __ovld vload16(size_t offset, const __global ulong *p);\n"
38200"float16 __ovld vload16(size_t offset, const __global float *p);\n"
38201"char2 __ovld vload2(size_t offset, const __local char *p);\n"
38202"uchar2 __ovld vload2(size_t offset, const __local uchar *p);\n"
38203"short2 __ovld vload2(size_t offset, const __local short *p);\n"
38204"ushort2 __ovld vload2(size_t offset, const __local ushort *p);\n"
38205"int2 __ovld vload2(size_t offset, const __local int *p);\n"
38206"uint2 __ovld vload2(size_t offset, const __local uint *p);\n"
38207"long2 __ovld vload2(size_t offset, const __local long *p);\n"
38208"ulong2 __ovld vload2(size_t offset, const __local ulong *p);\n"
38209"float2 __ovld vload2(size_t offset, const __local float *p);\n"
38210"char3 __ovld vload3(size_t offset, const __local char *p);\n"
38211"uchar3 __ovld vload3(size_t offset, const __local uchar *p);\n"
38212"short3 __ovld vload3(size_t offset, const __local short *p);\n"
38213"ushort3 __ovld vload3(size_t offset, const __local ushort *p);\n"
38214"int3 __ovld vload3(size_t offset, const __local int *p);\n"
38215"uint3 __ovld vload3(size_t offset, const __local uint *p);\n"
38216"long3 __ovld vload3(size_t offset, const __local long *p);\n"
38217"ulong3 __ovld vload3(size_t offset, const __local ulong *p);\n"
38218"float3 __ovld vload3(size_t offset, const __local float *p);\n"
38219"char4 __ovld vload4(size_t offset, const __local char *p);\n"
38220"uchar4 __ovld vload4(size_t offset, const __local uchar *p);\n"
38221"short4 __ovld vload4(size_t offset, const __local short *p);\n"
38222"ushort4 __ovld vload4(size_t offset, const __local ushort *p);\n"
38223"int4 __ovld vload4(size_t offset, const __local int *p);\n"
38224"uint4 __ovld vload4(size_t offset, const __local uint *p);\n"
38225"long4 __ovld vload4(size_t offset, const __local long *p);\n"
38226"ulong4 __ovld vload4(size_t offset, const __local ulong *p);\n"
38227"float4 __ovld vload4(size_t offset, const __local float *p);\n"
38228"char8 __ovld vload8(size_t offset, const __local char *p);\n"
38229"uchar8 __ovld vload8(size_t offset, const __local uchar *p);\n"
38230"short8 __ovld vload8(size_t offset, const __local short *p);\n"
38231"ushort8 __ovld vload8(size_t offset, const __local ushort *p);\n"
38232"int8 __ovld vload8(size_t offset, const __local int *p);\n"
38233"uint8 __ovld vload8(size_t offset, const __local uint *p);\n"
38234"long8 __ovld vload8(size_t offset, const __local long *p);\n"
38235"ulong8 __ovld vload8(size_t offset, const __local ulong *p);\n"
38236"float8 __ovld vload8(size_t offset, const __local float *p);\n"
38237"char16 __ovld vload16(size_t offset, const __local char *p);\n"
38238"uchar16 __ovld vload16(size_t offset, const __local uchar *p);\n"
38239"short16 __ovld vload16(size_t offset, const __local short *p);\n"
38240"ushort16 __ovld vload16(size_t offset, const __local ushort *p);\n"
38241"int16 __ovld vload16(size_t offset, const __local int *p);\n"
38242"uint16 __ovld vload16(size_t offset, const __local uint *p);\n"
38243"long16 __ovld vload16(size_t offset, const __local long *p);\n"
38244"ulong16 __ovld vload16(size_t offset, const __local ulong *p);\n"
38245"float16 __ovld vload16(size_t offset, const __local float *p);\n"
38246"char2 __ovld vload2(size_t offset, const __private char *p);\n"
38247"uchar2 __ovld vload2(size_t offset, const __private uchar *p);\n"
38248"short2 __ovld vload2(size_t offset, const __private short *p);\n"
38249"ushort2 __ovld vload2(size_t offset, const __private ushort *p);\n"
38250"int2 __ovld vload2(size_t offset, const __private int *p);\n"
38251"uint2 __ovld vload2(size_t offset, const __private uint *p);\n"
38252"long2 __ovld vload2(size_t offset, const __private long *p);\n"
38253"ulong2 __ovld vload2(size_t offset, const __private ulong *p);\n"
38254"float2 __ovld vload2(size_t offset, const __private float *p);\n"
38255"char3 __ovld vload3(size_t offset, const __private char *p);\n"
38256"uchar3 __ovld vload3(size_t offset, const __private uchar *p);\n"
38257"short3 __ovld vload3(size_t offset, const __private short *p);\n"
38258"ushort3 __ovld vload3(size_t offset, const __private ushort *p);\n"
38259"int3 __ovld vload3(size_t offset, const __private int *p);\n"
38260"uint3 __ovld vload3(size_t offset, const __private uint *p);\n"
38261"long3 __ovld vload3(size_t offset, const __private long *p);\n"
38262"ulong3 __ovld vload3(size_t offset, const __private ulong *p);\n"
38263"float3 __ovld vload3(size_t offset, const __private float *p);\n"
38264"char4 __ovld vload4(size_t offset, const __private char *p);\n"
38265"uchar4 __ovld vload4(size_t offset, const __private uchar *p);\n"
38266"short4 __ovld vload4(size_t offset, const __private short *p);\n"
38267"ushort4 __ovld vload4(size_t offset, const __private ushort *p);\n"
38268"int4 __ovld vload4(size_t offset, const __private int *p);\n"
38269"uint4 __ovld vload4(size_t offset, const __private uint *p);\n"
38270"long4 __ovld vload4(size_t offset, const __private long *p);\n"
38271"ulong4 __ovld vload4(size_t offset, const __private ulong *p);\n"
38272"float4 __ovld vload4(size_t offset, const __private float *p);\n"
38273"char8 __ovld vload8(size_t offset, const __private char *p);\n"
38274"uchar8 __ovld vload8(size_t offset, const __private uchar *p);\n"
38275"short8 __ovld vload8(size_t offset, const __private short *p);\n"
38276"ushort8 __ovld vload8(size_t offset, const __private ushort *p);\n"
38277"int8 __ovld vload8(size_t offset, const __private int *p);\n"
38278"uint8 __ovld vload8(size_t offset, const __private uint *p);\n"
38279"long8 __ovld vload8(size_t offset, const __private long *p);\n"
38280"ulong8 __ovld vload8(size_t offset, const __private ulong *p);\n"
38281"float8 __ovld vload8(size_t offset, const __private float *p);\n"
38282"char16 __ovld vload16(size_t offset, const __private char *p);\n"
38283"uchar16 __ovld vload16(size_t offset, const __private uchar *p);\n"
38284"short16 __ovld vload16(size_t offset, const __private short *p);\n"
38285"ushort16 __ovld vload16(size_t offset, const __private ushort *p);\n"
38286"int16 __ovld vload16(size_t offset, const __private int *p);\n"
38287"uint16 __ovld vload16(size_t offset, const __private uint *p);\n"
38288"long16 __ovld vload16(size_t offset, const __private long *p);\n"
38289"ulong16 __ovld vload16(size_t offset, const __private ulong *p);\n"
38290"float16 __ovld vload16(size_t offset, const __private float *p);\n"
38291"\n"
38292"#ifdef cl_khr_fp64\n"
38293"double2 __ovld vload2(size_t offset, const __global double *p);\n"
38294"double3 __ovld vload3(size_t offset, const __global double *p);\n"
38295"double4 __ovld vload4(size_t offset, const __global double *p);\n"
38296"double8 __ovld vload8(size_t offset, const __global double *p);\n"
38297"double16 __ovld vload16(size_t offset, const __global double *p);\n"
38298"double2 __ovld vload2(size_t offset, const __local double *p);\n"
38299"double3 __ovld vload3(size_t offset, const __local double *p);\n"
38300"double4 __ovld vload4(size_t offset, const __local double *p);\n"
38301"double8 __ovld vload8(size_t offset, const __local double *p);\n"
38302"double16 __ovld vload16(size_t offset, const __local double *p);\n"
38303"double2 __ovld vload2(size_t offset, const __private double *p);\n"
38304"double3 __ovld vload3(size_t offset, const __private double *p);\n"
38305"double4 __ovld vload4(size_t offset, const __private double *p);\n"
38306"double8 __ovld vload8(size_t offset, const __private double *p);\n"
38307"double16 __ovld vload16(size_t offset, const __private double *p);\n"
38308"#endif //cl_khr_fp64\n"
38309"\n"
38310"#ifdef cl_khr_fp16\n"
38311"half __ovld vload(size_t offset, const __global half *p);\n"
38312"half2 __ovld vload2(size_t offset, const __global half *p);\n"
38313"half3 __ovld vload3(size_t offset, const __global half *p);\n"
38314"half4 __ovld vload4(size_t offset, const __global half *p);\n"
38315"half8 __ovld vload8(size_t offset, const __global half *p);\n"
38316"half16 __ovld vload16(size_t offset, const __global half *p);\n"
38317"half __ovld vload(size_t offset, const __local half *p);\n"
38318"half2 __ovld vload2(size_t offset, const __local half *p);\n"
38319"half3 __ovld vload3(size_t offset, const __local half *p);\n"
38320"half4 __ovld vload4(size_t offset, const __local half *p);\n"
38321"half8 __ovld vload8(size_t offset, const __local half *p);\n"
38322"half16 __ovld vload16(size_t offset, const __local half *p);\n"
38323"half __ovld vload(size_t offset, const __private half *p);\n"
38324"half2 __ovld vload2(size_t offset, const __private half *p);\n"
38325"half3 __ovld vload3(size_t offset, const __private half *p);\n"
38326"half4 __ovld vload4(size_t offset, const __private half *p);\n"
38327"half8 __ovld vload8(size_t offset, const __private half *p);\n"
38328"half16 __ovld vload16(size_t offset, const __private half *p);\n"
38329"#endif //cl_khr_fp16\n"
38330"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
38331"\n"
38332"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
38333"void __ovld vstore2(char2 data, size_t offset, char *p);\n"
38334"void __ovld vstore2(uchar2 data, size_t offset, uchar *p);\n"
38335"void __ovld vstore2(short2 data, size_t offset, short *p);\n"
38336"void __ovld vstore2(ushort2 data, size_t offset, ushort *p);\n"
38337"void __ovld vstore2(int2 data, size_t offset, int *p);\n"
38338"void __ovld vstore2(uint2 data, size_t offset, uint *p);\n"
38339"void __ovld vstore2(long2 data, size_t offset, long *p);\n"
38340"void __ovld vstore2(ulong2 data, size_t offset, ulong *p);\n"
38341"void __ovld vstore2(float2 data, size_t offset, float *p);\n"
38342"void __ovld vstore3(char3 data, size_t offset, char *p);\n"
38343"void __ovld vstore3(uchar3 data, size_t offset, uchar *p);\n"
38344"void __ovld vstore3(short3 data, size_t offset, short *p);\n"
38345"void __ovld vstore3(ushort3 data, size_t offset, ushort *p);\n"
38346"void __ovld vstore3(int3 data, size_t offset, int *p);\n"
38347"void __ovld vstore3(uint3 data, size_t offset, uint *p);\n"
38348"void __ovld vstore3(long3 data, size_t offset, long *p);\n"
38349"void __ovld vstore3(ulong3 data, size_t offset, ulong *p);\n"
38350"void __ovld vstore3(float3 data, size_t offset, float *p);\n"
38351"void __ovld vstore4(char4 data, size_t offset, char *p);\n"
38352"void __ovld vstore4(uchar4 data, size_t offset, uchar *p);\n"
38353"void __ovld vstore4(short4 data, size_t offset, short *p);\n"
38354"void __ovld vstore4(ushort4 data, size_t offset, ushort *p);\n"
38355"void __ovld vstore4(int4 data, size_t offset, int *p);\n"
38356"void __ovld vstore4(uint4 data, size_t offset, uint *p);\n"
38357"void __ovld vstore4(long4 data, size_t offset, long *p);\n"
38358"void __ovld vstore4(ulong4 data, size_t offset, ulong *p);\n"
38359"void __ovld vstore4(float4 data, size_t offset, float *p);\n"
38360"void __ovld vstore8(char8 data, size_t offset, char *p);\n"
38361"void __ovld vstore8(uchar8 data, size_t offset, uchar *p);\n"
38362"void __ovld vstore8(short8 data, size_t offset, short *p);\n"
38363"void __ovld vstore8(ushort8 data, size_t offset, ushort *p);\n"
38364"void __ovld vstore8(int8 data, size_t offset, int *p);\n"
38365"void __ovld vstore8(uint8 data, size_t offset, uint *p);\n"
38366"void __ovld vstore8(long8 data, size_t offset, long *p);\n"
38367"void __ovld vstore8(ulong8 data, size_t offset, ulong *p);\n"
38368"void __ovld vstore8(float8 data, size_t offset, float *p);\n"
38369"void __ovld vstore16(char16 data, size_t offset, char *p);\n"
38370"void __ovld vstore16(uchar16 data, size_t offset, uchar *p);\n"
38371"void __ovld vstore16(short16 data, size_t offset, short *p);\n"
38372"void __ovld vstore16(ushort16 data, size_t offset, ushort *p);\n"
38373"void __ovld vstore16(int16 data, size_t offset, int *p);\n"
38374"void __ovld vstore16(uint16 data, size_t offset, uint *p);\n"
38375"void __ovld vstore16(long16 data, size_t offset, long *p);\n"
38376"void __ovld vstore16(ulong16 data, size_t offset, ulong *p);\n"
38377"void __ovld vstore16(float16 data, size_t offset, float *p);\n"
38378"#ifdef cl_khr_fp64\n"
38379"void __ovld vstore2(double2 data, size_t offset, double *p);\n"
38380"void __ovld vstore3(double3 data, size_t offset, double *p);\n"
38381"void __ovld vstore4(double4 data, size_t offset, double *p);\n"
38382"void __ovld vstore8(double8 data, size_t offset, double *p);\n"
38383"void __ovld vstore16(double16 data, size_t offset, double *p);\n"
38384"#endif //cl_khr_fp64\n"
38385"#ifdef cl_khr_fp16\n"
38386"void __ovld vstore(half data, size_t offset, half *p);\n"
38387"void __ovld vstore2(half2 data, size_t offset, half *p);\n"
38388"void __ovld vstore3(half3 data, size_t offset, half *p);\n"
38389"void __ovld vstore4(half4 data, size_t offset, half *p);\n"
38390"void __ovld vstore8(half8 data, size_t offset, half *p);\n"
38391"void __ovld vstore16(half16 data, size_t offset, half *p);\n"
38392"#endif //cl_khr_fp16\n"
38393"#else\n"
38394"void __ovld vstore2(char2 data, size_t offset, __global char *p);\n"
38395"void __ovld vstore2(uchar2 data, size_t offset, __global uchar *p);\n"
38396"void __ovld vstore2(short2 data, size_t offset, __global short *p);\n"
38397"void __ovld vstore2(ushort2 data, size_t offset, __global ushort *p);\n"
38398"void __ovld vstore2(int2 data, size_t offset, __global int *p);\n"
38399"void __ovld vstore2(uint2 data, size_t offset, __global uint *p);\n"
38400"void __ovld vstore2(long2 data, size_t offset, __global long *p);\n"
38401"void __ovld vstore2(ulong2 data, size_t offset, __global ulong *p);\n"
38402"void __ovld vstore2(float2 data, size_t offset, __global float *p);\n"
38403"void __ovld vstore3(char3 data, size_t offset, __global char *p);\n"
38404"void __ovld vstore3(uchar3 data, size_t offset, __global uchar *p);\n"
38405"void __ovld vstore3(short3 data, size_t offset, __global short *p);\n"
38406"void __ovld vstore3(ushort3 data, size_t offset, __global ushort *p);\n"
38407"void __ovld vstore3(int3 data, size_t offset, __global int *p);\n"
38408"void __ovld vstore3(uint3 data, size_t offset, __global uint *p);\n"
38409"void __ovld vstore3(long3 data, size_t offset, __global long *p);\n"
38410"void __ovld vstore3(ulong3 data, size_t offset, __global ulong *p);\n"
38411"void __ovld vstore3(float3 data, size_t offset, __global float *p);\n"
38412"void __ovld vstore4(char4 data, size_t offset, __global char *p);\n"
38413"void __ovld vstore4(uchar4 data, size_t offset, __global uchar *p);\n"
38414"void __ovld vstore4(short4 data, size_t offset, __global short *p);\n"
38415"void __ovld vstore4(ushort4 data, size_t offset, __global ushort *p);\n"
38416"void __ovld vstore4(int4 data, size_t offset, __global int *p);\n"
38417"void __ovld vstore4(uint4 data, size_t offset, __global uint *p);\n"
38418"void __ovld vstore4(long4 data, size_t offset, __global long *p);\n"
38419"void __ovld vstore4(ulong4 data, size_t offset, __global ulong *p);\n"
38420"void __ovld vstore4(float4 data, size_t offset, __global float *p);\n"
38421"void __ovld vstore8(char8 data, size_t offset, __global char *p);\n"
38422"void __ovld vstore8(uchar8 data, size_t offset, __global uchar *p);\n"
38423"void __ovld vstore8(short8 data, size_t offset, __global short *p);\n"
38424"void __ovld vstore8(ushort8 data, size_t offset, __global ushort *p);\n"
38425"void __ovld vstore8(int8 data, size_t offset, __global int *p);\n"
38426"void __ovld vstore8(uint8 data, size_t offset, __global uint *p);\n"
38427"void __ovld vstore8(long8 data, size_t offset, __global long *p);\n"
38428"void __ovld vstore8(ulong8 data, size_t offset, __global ulong *p);\n"
38429"void __ovld vstore8(float8 data, size_t offset, __global float *p);\n"
38430"void __ovld vstore16(char16 data, size_t offset, __global char *p);\n"
38431"void __ovld vstore16(uchar16 data, size_t offset, __global uchar *p);\n"
38432"void __ovld vstore16(short16 data, size_t offset, __global short *p);\n"
38433"void __ovld vstore16(ushort16 data, size_t offset, __global ushort *p);\n"
38434"void __ovld vstore16(int16 data, size_t offset, __global int *p);\n"
38435"void __ovld vstore16(uint16 data, size_t offset, __global uint *p);\n"
38436"void __ovld vstore16(long16 data, size_t offset, __global long *p);\n"
38437"void __ovld vstore16(ulong16 data, size_t offset, __global ulong *p);\n"
38438"void __ovld vstore16(float16 data, size_t offset, __global float *p);\n"
38439"void __ovld vstore2(char2 data, size_t offset, __local char *p);\n"
38440"void __ovld vstore2(uchar2 data, size_t offset, __local uchar *p);\n"
38441"void __ovld vstore2(short2 data, size_t offset, __local short *p);\n"
38442"void __ovld vstore2(ushort2 data, size_t offset, __local ushort *p);\n"
38443"void __ovld vstore2(int2 data, size_t offset, __local int *p);\n"
38444"void __ovld vstore2(uint2 data, size_t offset, __local uint *p);\n"
38445"void __ovld vstore2(long2 data, size_t offset, __local long *p);\n"
38446"void __ovld vstore2(ulong2 data, size_t offset, __local ulong *p);\n"
38447"void __ovld vstore2(float2 data, size_t offset, __local float *p);\n"
38448"void __ovld vstore3(char3 data, size_t offset, __local char *p);\n"
38449"void __ovld vstore3(uchar3 data, size_t offset, __local uchar *p);\n"
38450"void __ovld vstore3(short3 data, size_t offset, __local short *p);\n"
38451"void __ovld vstore3(ushort3 data, size_t offset, __local ushort *p);\n"
38452"void __ovld vstore3(int3 data, size_t offset, __local int *p);\n"
38453"void __ovld vstore3(uint3 data, size_t offset, __local uint *p);\n"
38454"void __ovld vstore3(long3 data, size_t offset, __local long *p);\n"
38455"void __ovld vstore3(ulong3 data, size_t offset, __local ulong *p);\n"
38456"void __ovld vstore3(float3 data, size_t offset, __local float *p);\n"
38457"void __ovld vstore4(char4 data, size_t offset, __local char *p);\n"
38458"void __ovld vstore4(uchar4 data, size_t offset, __local uchar *p);\n"
38459"void __ovld vstore4(short4 data, size_t offset, __local short *p);\n"
38460"void __ovld vstore4(ushort4 data, size_t offset, __local ushort *p);\n"
38461"void __ovld vstore4(int4 data, size_t offset, __local int *p);\n"
38462"void __ovld vstore4(uint4 data, size_t offset, __local uint *p);\n"
38463"void __ovld vstore4(long4 data, size_t offset, __local long *p);\n"
38464"void __ovld vstore4(ulong4 data, size_t offset, __local ulong *p);\n"
38465"void __ovld vstore4(float4 data, size_t offset, __local float *p);\n"
38466"void __ovld vstore8(char8 data, size_t offset, __local char *p);\n"
38467"void __ovld vstore8(uchar8 data, size_t offset, __local uchar *p);\n"
38468"void __ovld vstore8(short8 data, size_t offset, __local short *p);\n"
38469"void __ovld vstore8(ushort8 data, size_t offset, __local ushort *p);\n"
38470"void __ovld vstore8(int8 data, size_t offset, __local int *p);\n"
38471"void __ovld vstore8(uint8 data, size_t offset, __local uint *p);\n"
38472"void __ovld vstore8(long8 data, size_t offset, __local long *p);\n"
38473"void __ovld vstore8(ulong8 data, size_t offset, __local ulong *p);\n"
38474"void __ovld vstore8(float8 data, size_t offset, __local float *p);\n"
38475"void __ovld vstore16(char16 data, size_t offset, __local char *p);\n"
38476"void __ovld vstore16(uchar16 data, size_t offset, __local uchar *p);\n"
38477"void __ovld vstore16(short16 data, size_t offset, __local short *p);\n"
38478"void __ovld vstore16(ushort16 data, size_t offset, __local ushort *p);\n"
38479"void __ovld vstore16(int16 data, size_t offset, __local int *p);\n"
38480"void __ovld vstore16(uint16 data, size_t offset, __local uint *p);\n"
38481"void __ovld vstore16(long16 data, size_t offset, __local long *p);\n"
38482"void __ovld vstore16(ulong16 data, size_t offset, __local ulong *p);\n"
38483"void __ovld vstore16(float16 data, size_t offset, __local float *p);\n"
38484"void __ovld vstore2(char2 data, size_t offset, __private char *p);\n"
38485"void __ovld vstore2(uchar2 data, size_t offset, __private uchar *p);\n"
38486"void __ovld vstore2(short2 data, size_t offset, __private short *p);\n"
38487"void __ovld vstore2(ushort2 data, size_t offset, __private ushort *p);\n"
38488"void __ovld vstore2(int2 data, size_t offset, __private int *p);\n"
38489"void __ovld vstore2(uint2 data, size_t offset, __private uint *p);\n"
38490"void __ovld vstore2(long2 data, size_t offset, __private long *p);\n"
38491"void __ovld vstore2(ulong2 data, size_t offset, __private ulong *p);\n"
38492"void __ovld vstore2(float2 data, size_t offset, __private float *p);\n"
38493"void __ovld vstore3(char3 data, size_t offset, __private char *p);\n"
38494"void __ovld vstore3(uchar3 data, size_t offset, __private uchar *p);\n"
38495"void __ovld vstore3(short3 data, size_t offset, __private short *p);\n"
38496"void __ovld vstore3(ushort3 data, size_t offset, __private ushort *p);\n"
38497"void __ovld vstore3(int3 data, size_t offset, __private int *p);\n"
38498"void __ovld vstore3(uint3 data, size_t offset, __private uint *p);\n"
38499"void __ovld vstore3(long3 data, size_t offset, __private long *p);\n"
38500"void __ovld vstore3(ulong3 data, size_t offset, __private ulong *p);\n"
38501"void __ovld vstore3(float3 data, size_t offset, __private float *p);\n"
38502"void __ovld vstore4(char4 data, size_t offset, __private char *p);\n"
38503"void __ovld vstore4(uchar4 data, size_t offset, __private uchar *p);\n"
38504"void __ovld vstore4(short4 data, size_t offset, __private short *p);\n"
38505"void __ovld vstore4(ushort4 data, size_t offset, __private ushort *p);\n"
38506"void __ovld vstore4(int4 data, size_t offset, __private int *p);\n"
38507"void __ovld vstore4(uint4 data, size_t offset, __private uint *p);\n"
38508"void __ovld vstore4(long4 data, size_t offset, __private long *p);\n"
38509"void __ovld vstore4(ulong4 data, size_t offset, __private ulong *p);\n"
38510"void __ovld vstore4(float4 data, size_t offset, __private float *p);\n"
38511"void __ovld vstore8(char8 data, size_t offset, __private char *p);\n"
38512"void __ovld vstore8(uchar8 data, size_t offset, __private uchar *p);\n"
38513"void __ovld vstore8(short8 data, size_t offset, __private short *p);\n"
38514"void __ovld vstore8(ushort8 data, size_t offset, __private ushort *p);\n"
38515"void __ovld vstore8(int8 data, size_t offset, __private int *p);\n"
38516"void __ovld vstore8(uint8 data, size_t offset, __private uint *p);\n"
38517"void __ovld vstore8(long8 data, size_t offset, __private long *p);\n"
38518"void __ovld vstore8(ulong8 data, size_t offset, __private ulong *p);\n"
38519"void __ovld vstore8(float8 data, size_t offset, __private float *p);\n"
38520"void __ovld vstore16(char16 data, size_t offset, __private char *p);\n"
38521"void __ovld vstore16(uchar16 data, size_t offset, __private uchar *p);\n"
38522"void __ovld vstore16(short16 data, size_t offset, __private short *p);\n"
38523"void __ovld vstore16(ushort16 data, size_t offset, __private ushort *p);\n"
38524"void __ovld vstore16(int16 data, size_t offset, __private int *p);\n"
38525"void __ovld vstore16(uint16 data, size_t offset, __private uint *p);\n"
38526"void __ovld vstore16(long16 data, size_t offset, __private long *p);\n"
38527"void __ovld vstore16(ulong16 data, size_t offset, __private ulong *p);\n"
38528"void __ovld vstore16(float16 data, size_t offset, __private float *p);\n"
38529"#ifdef cl_khr_fp64\n"
38530"void __ovld vstore2(double2 data, size_t offset, __global double *p);\n"
38531"void __ovld vstore3(double3 data, size_t offset, __global double *p);\n"
38532"void __ovld vstore4(double4 data, size_t offset, __global double *p);\n"
38533"void __ovld vstore8(double8 data, size_t offset, __global double *p);\n"
38534"void __ovld vstore16(double16 data, size_t offset, __global double *p);\n"
38535"void __ovld vstore2(double2 data, size_t offset, __local double *p);\n"
38536"void __ovld vstore3(double3 data, size_t offset, __local double *p);\n"
38537"void __ovld vstore4(double4 data, size_t offset, __local double *p);\n"
38538"void __ovld vstore8(double8 data, size_t offset, __local double *p);\n"
38539"void __ovld vstore16(double16 data, size_t offset, __local double *p);\n"
38540"void __ovld vstore2(double2 data, size_t offset, __private double *p);\n"
38541"void __ovld vstore3(double3 data, size_t offset, __private double *p);\n"
38542"void __ovld vstore4(double4 data, size_t offset, __private double *p);\n"
38543"void __ovld vstore8(double8 data, size_t offset, __private double *p);\n"
38544"void __ovld vstore16(double16 data, size_t offset, __private double *p);\n"
38545"#endif //cl_khr_fp64\n"
38546"#ifdef cl_khr_fp16\n"
38547"void __ovld vstore(half data, size_t offset, __global half *p);\n"
38548"void __ovld vstore2(half2 data, size_t offset, __global half *p);\n"
38549"void __ovld vstore3(half3 data, size_t offset, __global half *p);\n"
38550"void __ovld vstore4(half4 data, size_t offset, __global half *p);\n"
38551"void __ovld vstore8(half8 data, size_t offset, __global half *p);\n"
38552"void __ovld vstore16(half16 data, size_t offset, __global half *p);\n"
38553"void __ovld vstore(half data, size_t offset, __local half *p);\n"
38554"void __ovld vstore2(half2 data, size_t offset, __local half *p);\n"
38555"void __ovld vstore3(half3 data, size_t offset, __local half *p);\n"
38556"void __ovld vstore4(half4 data, size_t offset, __local half *p);\n"
38557"void __ovld vstore8(half8 data, size_t offset, __local half *p);\n"
38558"void __ovld vstore16(half16 data, size_t offset, __local half *p);\n"
38559"void __ovld vstore(half data, size_t offset, __private half *p);\n"
38560"void __ovld vstore2(half2 data, size_t offset, __private half *p);\n"
38561"void __ovld vstore3(half3 data, size_t offset, __private half *p);\n"
38562"void __ovld vstore4(half4 data, size_t offset, __private half *p);\n"
38563"void __ovld vstore8(half8 data, size_t offset, __private half *p);\n"
38564"void __ovld vstore16(half16 data, size_t offset, __private half *p);\n"
38565"#endif //cl_khr_fp16\n"
38566"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
38567"\n"
38568"/**\n"
38569" * Read sizeof (half) bytes of data from address\n"
38570" * (p + offset). The data read is interpreted as a\n"
38571" * half value. The half value is converted to a\n"
38572" * float value and the float value is returned.\n"
38573" * The read address computed as (p + offset)\n"
38574" * must be 16-bit aligned.\n"
38575" */\n"
38576"float __ovld vload_half(size_t offset, const __constant half *p);\n"
38577"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
38578"float __ovld vload_half(size_t offset, const half *p);\n"
38579"#else\n"
38580"float __ovld vload_half(size_t offset, const __global half *p);\n"
38581"float __ovld vload_half(size_t offset, const __local half *p);\n"
38582"float __ovld vload_half(size_t offset, const __private half *p);\n"
38583"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
38584"\n"
38585"/**\n"
38586" * Read sizeof (halfn) bytes of data from address\n"
38587" * (p + (offset * n)). The data read is interpreted\n"
38588" * as a halfn value. The halfn value read is\n"
38589" * converted to a floatn value and the floatn\n"
38590" * value is returned. The read address computed\n"
38591" * as (p + (offset * n)) must be 16-bit aligned.\n"
38592" */\n"
38593"float2 __ovld vload_half2(size_t offset, const __constant half *p);\n"
38594"float3 __ovld vload_half3(size_t offset, const __constant half *p);\n"
38595"float4 __ovld vload_half4(size_t offset, const __constant half *p);\n"
38596"float8 __ovld vload_half8(size_t offset, const __constant half *p);\n"
38597"float16 __ovld vload_half16(size_t offset, const __constant half *p);\n"
38598"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
38599"float2 __ovld vload_half2(size_t offset, const half *p);\n"
38600"float3 __ovld vload_half3(size_t offset, const half *p);\n"
38601"float4 __ovld vload_half4(size_t offset, const half *p);\n"
38602"float8 __ovld vload_half8(size_t offset, const half *p);\n"
38603"float16 __ovld vload_half16(size_t offset, const half *p);\n"
38604"#else\n"
38605"float2 __ovld vload_half2(size_t offset, const __global half *p);\n"
38606"float3 __ovld vload_half3(size_t offset, const __global half *p);\n"
38607"float4 __ovld vload_half4(size_t offset, const __global half *p);\n"
38608"float8 __ovld vload_half8(size_t offset, const __global half *p);\n"
38609"float16 __ovld vload_half16(size_t offset, const __global half *p);\n"
38610"float2 __ovld vload_half2(size_t offset, const __local half *p);\n"
38611"float3 __ovld vload_half3(size_t offset, const __local half *p);\n"
38612"float4 __ovld vload_half4(size_t offset, const __local half *p);\n"
38613"float8 __ovld vload_half8(size_t offset, const __local half *p);\n"
38614"float16 __ovld vload_half16(size_t offset, const __local half *p);\n"
38615"float2 __ovld vload_half2(size_t offset, const __private half *p);\n"
38616"float3 __ovld vload_half3(size_t offset, const __private half *p);\n"
38617"float4 __ovld vload_half4(size_t offset, const __private half *p);\n"
38618"float8 __ovld vload_half8(size_t offset, const __private half *p);\n"
38619"float16 __ovld vload_half16(size_t offset, const __private half *p);\n"
38620"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
38621"\n"
38622"/**\n"
38623" * The float value given by data is first\n"
38624" * converted to a half value using the appropriate\n"
38625" * rounding mode. The half value is then written\n"
38626" * to address computed as (p + offset). The\n"
38627" * address computed as (p + offset) must be 16-\n"
38628" * bit aligned.\n"
38629" * vstore_half use the current rounding mode.\n"
38630" * The default current rounding mode is round to\n"
38631" * nearest even.\n"
38632" */\n"
38633"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
38634"void __ovld vstore_half(float data, size_t offset, half *p);\n"
38635"void __ovld vstore_half_rte(float data, size_t offset, half *p);\n"
38636"void __ovld vstore_half_rtz(float data, size_t offset, half *p);\n"
38637"void __ovld vstore_half_rtp(float data, size_t offset, half *p);\n"
38638"void __ovld vstore_half_rtn(float data, size_t offset, half *p);\n"
38639"#ifdef cl_khr_fp64\n"
38640"void __ovld vstore_half(double data, size_t offset, half *p);\n"
38641"void __ovld vstore_half_rte(double data, size_t offset, half *p);\n"
38642"void __ovld vstore_half_rtz(double data, size_t offset, half *p);\n"
38643"void __ovld vstore_half_rtp(double data, size_t offset, half *p);\n"
38644"void __ovld vstore_half_rtn(double data, size_t offset, half *p);\n"
38645"#endif //cl_khr_fp64\n"
38646"#else\n"
38647"void __ovld vstore_half(float data, size_t offset, __global half *p);\n"
38648"void __ovld vstore_half_rte(float data, size_t offset, __global half *p);\n"
38649"void __ovld vstore_half_rtz(float data, size_t offset, __global half *p);\n"
38650"void __ovld vstore_half_rtp(float data, size_t offset, __global half *p);\n"
38651"void __ovld vstore_half_rtn(float data, size_t offset, __global half *p);\n"
38652"void __ovld vstore_half(float data, size_t offset, __local half *p);\n"
38653"void __ovld vstore_half_rte(float data, size_t offset, __local half *p);\n"
38654"void __ovld vstore_half_rtz(float data, size_t offset, __local half *p);\n"
38655"void __ovld vstore_half_rtp(float data, size_t offset, __local half *p);\n"
38656"void __ovld vstore_half_rtn(float data, size_t offset, __local half *p);\n"
38657"void __ovld vstore_half(float data, size_t offset, __private half *p);\n"
38658"void __ovld vstore_half_rte(float data, size_t offset, __private half *p);\n"
38659"void __ovld vstore_half_rtz(float data, size_t offset, __private half *p);\n"
38660"void __ovld vstore_half_rtp(float data, size_t offset, __private half *p);\n"
38661"void __ovld vstore_half_rtn(float data, size_t offset, __private half *p);\n"
38662"#ifdef cl_khr_fp64\n"
38663"void __ovld vstore_half(double data, size_t offset, __global half *p);\n"
38664"void __ovld vstore_half_rte(double data, size_t offset, __global half *p);\n"
38665"void __ovld vstore_half_rtz(double data, size_t offset, __global half *p);\n"
38666"void __ovld vstore_half_rtp(double data, size_t offset, __global half *p);\n"
38667"void __ovld vstore_half_rtn(double data, size_t offset, __global half *p);\n"
38668"void __ovld vstore_half(double data, size_t offset, __local half *p);\n"
38669"void __ovld vstore_half_rte(double data, size_t offset, __local half *p);\n"
38670"void __ovld vstore_half_rtz(double data, size_t offset, __local half *p);\n"
38671"void __ovld vstore_half_rtp(double data, size_t offset, __local half *p);\n"
38672"void __ovld vstore_half_rtn(double data, size_t offset, __local half *p);\n"
38673"void __ovld vstore_half(double data, size_t offset, __private half *p);\n"
38674"void __ovld vstore_half_rte(double data, size_t offset, __private half *p);\n"
38675"void __ovld vstore_half_rtz(double data, size_t offset, __private half *p);\n"
38676"void __ovld vstore_half_rtp(double data, size_t offset, __private half *p);\n"
38677"void __ovld vstore_half_rtn(double data, size_t offset, __private half *p);\n"
38678"#endif //cl_khr_fp64\n"
38679"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
38680"\n"
38681"/**\n"
38682" * The floatn value given by data is converted to\n"
38683" * a halfn value using the appropriate rounding\n"
38684" * mode. The halfn value is then written to\n"
38685" * address computed as (p + (offset * n)). The\n"
38686" * address computed as (p + (offset * n)) must be\n"
38687" * 16-bit aligned.\n"
38688" * vstore_halfn uses the current rounding mode.\n"
38689" * The default current rounding mode is round to\n"
38690" * nearest even.\n"
38691" */\n"
38692"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
38693"void __ovld vstore_half2(float2 data, size_t offset, half *p);\n"
38694"void __ovld vstore_half3(float3 data, size_t offset, half *p);\n"
38695"void __ovld vstore_half4(float4 data, size_t offset, half *p);\n"
38696"void __ovld vstore_half8(float8 data, size_t offset, half *p);\n"
38697"void __ovld vstore_half16(float16 data, size_t offset, half *p);\n"
38698"void __ovld vstore_half2_rte(float2 data, size_t offset, half *p);\n"
38699"void __ovld vstore_half3_rte(float3 data, size_t offset, half *p);\n"
38700"void __ovld vstore_half4_rte(float4 data, size_t offset, half *p);\n"
38701"void __ovld vstore_half8_rte(float8 data, size_t offset, half *p);\n"
38702"void __ovld vstore_half16_rte(float16 data, size_t offset, half *p);\n"
38703"void __ovld vstore_half2_rtz(float2 data, size_t offset, half *p);\n"
38704"void __ovld vstore_half3_rtz(float3 data, size_t offset, half *p);\n"
38705"void __ovld vstore_half4_rtz(float4 data, size_t offset, half *p);\n"
38706"void __ovld vstore_half8_rtz(float8 data, size_t offset, half *p);\n"
38707"void __ovld vstore_half16_rtz(float16 data, size_t offset, half *p);\n"
38708"void __ovld vstore_half2_rtp(float2 data, size_t offset, half *p);\n"
38709"void __ovld vstore_half3_rtp(float3 data, size_t offset, half *p);\n"
38710"void __ovld vstore_half4_rtp(float4 data, size_t offset, half *p);\n"
38711"void __ovld vstore_half8_rtp(float8 data, size_t offset, half *p);\n"
38712"void __ovld vstore_half16_rtp(float16 data, size_t offset, half *p);\n"
38713"void __ovld vstore_half2_rtn(float2 data, size_t offset, half *p);\n"
38714"void __ovld vstore_half3_rtn(float3 data, size_t offset, half *p);\n"
38715"void __ovld vstore_half4_rtn(float4 data, size_t offset, half *p);\n"
38716"void __ovld vstore_half8_rtn(float8 data, size_t offset, half *p);\n"
38717"void __ovld vstore_half16_rtn(float16 data, size_t offset, half *p);\n"
38718"#ifdef cl_khr_fp64\n"
38719"void __ovld vstore_half2(double2 data, size_t offset, half *p);\n"
38720"void __ovld vstore_half3(double3 data, size_t offset, half *p);\n"
38721"void __ovld vstore_half4(double4 data, size_t offset, half *p);\n"
38722"void __ovld vstore_half8(double8 data, size_t offset, half *p);\n"
38723"void __ovld vstore_half16(double16 data, size_t offset, half *p);\n"
38724"void __ovld vstore_half2_rte(double2 data, size_t offset, half *p);\n"
38725"void __ovld vstore_half3_rte(double3 data, size_t offset, half *p);\n"
38726"void __ovld vstore_half4_rte(double4 data, size_t offset, half *p);\n"
38727"void __ovld vstore_half8_rte(double8 data, size_t offset, half *p);\n"
38728"void __ovld vstore_half16_rte(double16 data, size_t offset, half *p);\n"
38729"void __ovld vstore_half2_rtz(double2 data, size_t offset, half *p);\n"
38730"void __ovld vstore_half3_rtz(double3 data, size_t offset, half *p);\n"
38731"void __ovld vstore_half4_rtz(double4 data, size_t offset, half *p);\n"
38732"void __ovld vstore_half8_rtz(double8 data, size_t offset, half *p);\n"
38733"void __ovld vstore_half16_rtz(double16 data, size_t offset, half *p);\n"
38734"void __ovld vstore_half2_rtp(double2 data, size_t offset, half *p);\n"
38735"void __ovld vstore_half3_rtp(double3 data, size_t offset, half *p);\n"
38736"void __ovld vstore_half4_rtp(double4 data, size_t offset, half *p);\n"
38737"void __ovld vstore_half8_rtp(double8 data, size_t offset, half *p);\n"
38738"void __ovld vstore_half16_rtp(double16 data, size_t offset, half *p);\n"
38739"void __ovld vstore_half2_rtn(double2 data, size_t offset, half *p);\n"
38740"void __ovld vstore_half3_rtn(double3 data, size_t offset, half *p);\n"
38741"void __ovld vstore_half4_rtn(double4 data, size_t offset, half *p);\n"
38742"void __ovld vstore_half8_rtn(double8 data, size_t offset, half *p);\n"
38743"void __ovld vstore_half16_rtn(double16 data, size_t offset, half *p);\n"
38744"#endif //cl_khr_fp64\n"
38745"#else\n"
38746"void __ovld vstore_half2(float2 data, size_t offset, __global half *p);\n"
38747"void __ovld vstore_half3(float3 data, size_t offset, __global half *p);\n"
38748"void __ovld vstore_half4(float4 data, size_t offset, __global half *p);\n"
38749"void __ovld vstore_half8(float8 data, size_t offset, __global half *p);\n"
38750"void __ovld vstore_half16(float16 data, size_t offset, __global half *p);\n"
38751"void __ovld vstore_half2_rte(float2 data, size_t offset, __global half *p);\n"
38752"void __ovld vstore_half3_rte(float3 data, size_t offset, __global half *p);\n"
38753"void __ovld vstore_half4_rte(float4 data, size_t offset, __global half *p);\n"
38754"void __ovld vstore_half8_rte(float8 data, size_t offset, __global half *p);\n"
38755"void __ovld vstore_half16_rte(float16 data, size_t offset, __global half *p);\n"
38756"void __ovld vstore_half2_rtz(float2 data, size_t offset, __global half *p);\n"
38757"void __ovld vstore_half3_rtz(float3 data, size_t offset, __global half *p);\n"
38758"void __ovld vstore_half4_rtz(float4 data, size_t offset, __global half *p);\n"
38759"void __ovld vstore_half8_rtz(float8 data, size_t offset, __global half *p);\n"
38760"void __ovld vstore_half16_rtz(float16 data, size_t offset, __global half *p);\n"
38761"void __ovld vstore_half2_rtp(float2 data, size_t offset, __global half *p);\n"
38762"void __ovld vstore_half3_rtp(float3 data, size_t offset, __global half *p);\n"
38763"void __ovld vstore_half4_rtp(float4 data, size_t offset, __global half *p);\n"
38764"void __ovld vstore_half8_rtp(float8 data, size_t offset, __global half *p);\n"
38765"void __ovld vstore_half16_rtp(float16 data, size_t offset, __global half *p);\n"
38766"void __ovld vstore_half2_rtn(float2 data, size_t offset, __global half *p);\n"
38767"void __ovld vstore_half3_rtn(float3 data, size_t offset, __global half *p);\n"
38768"void __ovld vstore_half4_rtn(float4 data, size_t offset, __global half *p);\n"
38769"void __ovld vstore_half8_rtn(float8 data, size_t offset, __global half *p);\n"
38770"void __ovld vstore_half16_rtn(float16 data, size_t offset, __global half *p);\n"
38771"void __ovld vstore_half2(float2 data, size_t offset, __local half *p);\n"
38772"void __ovld vstore_half3(float3 data, size_t offset, __local half *p);\n"
38773"void __ovld vstore_half4(float4 data, size_t offset, __local half *p);\n"
38774"void __ovld vstore_half8(float8 data, size_t offset, __local half *p);\n"
38775"void __ovld vstore_half16(float16 data, size_t offset, __local half *p);\n"
38776"void __ovld vstore_half2_rte(float2 data, size_t offset, __local half *p);\n"
38777"void __ovld vstore_half3_rte(float3 data, size_t offset, __local half *p);\n"
38778"void __ovld vstore_half4_rte(float4 data, size_t offset, __local half *p);\n"
38779"void __ovld vstore_half8_rte(float8 data, size_t offset, __local half *p);\n"
38780"void __ovld vstore_half16_rte(float16 data, size_t offset, __local half *p);\n"
38781"void __ovld vstore_half2_rtz(float2 data, size_t offset, __local half *p);\n"
38782"void __ovld vstore_half3_rtz(float3 data, size_t offset, __local half *p);\n"
38783"void __ovld vstore_half4_rtz(float4 data, size_t offset, __local half *p);\n"
38784"void __ovld vstore_half8_rtz(float8 data, size_t offset, __local half *p);\n"
38785"void __ovld vstore_half16_rtz(float16 data, size_t offset, __local half *p);\n"
38786"void __ovld vstore_half2_rtp(float2 data, size_t offset, __local half *p);\n"
38787"void __ovld vstore_half3_rtp(float3 data, size_t offset, __local half *p);\n"
38788"void __ovld vstore_half4_rtp(float4 data, size_t offset, __local half *p);\n"
38789"void __ovld vstore_half8_rtp(float8 data, size_t offset, __local half *p);\n"
38790"void __ovld vstore_half16_rtp(float16 data, size_t offset, __local half *p);\n"
38791"void __ovld vstore_half2_rtn(float2 data, size_t offset, __local half *p);\n"
38792"void __ovld vstore_half3_rtn(float3 data, size_t offset, __local half *p);\n"
38793"void __ovld vstore_half4_rtn(float4 data, size_t offset, __local half *p);\n"
38794"void __ovld vstore_half8_rtn(float8 data, size_t offset, __local half *p);\n"
38795"void __ovld vstore_half16_rtn(float16 data, size_t offset, __local half *p);\n"
38796"void __ovld vstore_half2(float2 data, size_t offset, __private half *p);\n"
38797"void __ovld vstore_half3(float3 data, size_t offset, __private half *p);\n"
38798"void __ovld vstore_half4(float4 data, size_t offset, __private half *p);\n"
38799"void __ovld vstore_half8(float8 data, size_t offset, __private half *p);\n"
38800"void __ovld vstore_half16(float16 data, size_t offset, __private half *p);\n"
38801"void __ovld vstore_half2_rte(float2 data, size_t offset, __private half *p);\n"
38802"void __ovld vstore_half3_rte(float3 data, size_t offset, __private half *p);\n"
38803"void __ovld vstore_half4_rte(float4 data, size_t offset, __private half *p);\n"
38804"void __ovld vstore_half8_rte(float8 data, size_t offset, __private half *p);\n"
38805"void __ovld vstore_half16_rte(float16 data, size_t offset, __private half *p);\n"
38806"void __ovld vstore_half2_rtz(float2 data, size_t offset, __private half *p);\n"
38807"void __ovld vstore_half3_rtz(float3 data, size_t offset, __private half *p);\n"
38808"void __ovld vstore_half4_rtz(float4 data, size_t offset, __private half *p);\n"
38809"void __ovld vstore_half8_rtz(float8 data, size_t offset, __private half *p);\n"
38810"void __ovld vstore_half16_rtz(float16 data, size_t offset, __private half *p);\n"
38811"void __ovld vstore_half2_rtp(float2 data, size_t offset, __private half *p);\n"
38812"void __ovld vstore_half3_rtp(float3 data, size_t offset, __private half *p);\n"
38813"void __ovld vstore_half4_rtp(float4 data, size_t offset, __private half *p);\n"
38814"void __ovld vstore_half8_rtp(float8 data, size_t offset, __private half *p);\n"
38815"void __ovld vstore_half16_rtp(float16 data, size_t offset, __private half *p);\n"
38816"void __ovld vstore_half2_rtn(float2 data, size_t offset, __private half *p);\n"
38817"void __ovld vstore_half3_rtn(float3 data, size_t offset, __private half *p);\n"
38818"void __ovld vstore_half4_rtn(float4 data, size_t offset, __private half *p);\n"
38819"void __ovld vstore_half8_rtn(float8 data, size_t offset, __private half *p);\n"
38820"void __ovld vstore_half16_rtn(float16 data, size_t offset, __private half *p);\n"
38821"#ifdef cl_khr_fp64\n"
38822"void __ovld vstore_half2(double2 data, size_t offset, __global half *p);\n"
38823"void __ovld vstore_half3(double3 data, size_t offset, __global half *p);\n"
38824"void __ovld vstore_half4(double4 data, size_t offset, __global half *p);\n"
38825"void __ovld vstore_half8(double8 data, size_t offset, __global half *p);\n"
38826"void __ovld vstore_half16(double16 data, size_t offset, __global half *p);\n"
38827"void __ovld vstore_half2_rte(double2 data, size_t offset, __global half *p);\n"
38828"void __ovld vstore_half3_rte(double3 data, size_t offset, __global half *p);\n"
38829"void __ovld vstore_half4_rte(double4 data, size_t offset, __global half *p);\n"
38830"void __ovld vstore_half8_rte(double8 data, size_t offset, __global half *p);\n"
38831"void __ovld vstore_half16_rte(double16 data, size_t offset, __global half *p);\n"
38832"void __ovld vstore_half2_rtz(double2 data, size_t offset, __global half *p);\n"
38833"void __ovld vstore_half3_rtz(double3 data, size_t offset, __global half *p);\n"
38834"void __ovld vstore_half4_rtz(double4 data, size_t offset, __global half *p);\n"
38835"void __ovld vstore_half8_rtz(double8 data, size_t offset, __global half *p);\n"
38836"void __ovld vstore_half16_rtz(double16 data, size_t offset, __global half *p);\n"
38837"void __ovld vstore_half2_rtp(double2 data, size_t offset, __global half *p);\n"
38838"void __ovld vstore_half3_rtp(double3 data, size_t offset, __global half *p);\n"
38839"void __ovld vstore_half4_rtp(double4 data, size_t offset, __global half *p);\n"
38840"void __ovld vstore_half8_rtp(double8 data, size_t offset, __global half *p);\n"
38841"void __ovld vstore_half16_rtp(double16 data, size_t offset, __global half *p);\n"
38842"void __ovld vstore_half2_rtn(double2 data, size_t offset, __global half *p);\n"
38843"void __ovld vstore_half3_rtn(double3 data, size_t offset, __global half *p);\n"
38844"void __ovld vstore_half4_rtn(double4 data, size_t offset, __global half *p);\n"
38845"void __ovld vstore_half8_rtn(double8 data, size_t offset, __global half *p);\n"
38846"void __ovld vstore_half16_rtn(double16 data, size_t offset, __global half *p);\n"
38847"void __ovld vstore_half2(double2 data, size_t offset, __local half *p);\n"
38848"void __ovld vstore_half3(double3 data, size_t offset, __local half *p);\n"
38849"void __ovld vstore_half4(double4 data, size_t offset, __local half *p);\n"
38850"void __ovld vstore_half8(double8 data, size_t offset, __local half *p);\n"
38851"void __ovld vstore_half16(double16 data, size_t offset, __local half *p);\n"
38852"void __ovld vstore_half2_rte(double2 data, size_t offset, __local half *p);\n"
38853"void __ovld vstore_half3_rte(double3 data, size_t offset, __local half *p);\n"
38854"void __ovld vstore_half4_rte(double4 data, size_t offset, __local half *p);\n"
38855"void __ovld vstore_half8_rte(double8 data, size_t offset, __local half *p);\n"
38856"void __ovld vstore_half16_rte(double16 data, size_t offset, __local half *p);\n"
38857"void __ovld vstore_half2_rtz(double2 data, size_t offset, __local half *p);\n"
38858"void __ovld vstore_half3_rtz(double3 data, size_t offset, __local half *p);\n"
38859"void __ovld vstore_half4_rtz(double4 data, size_t offset, __local half *p);\n"
38860"void __ovld vstore_half8_rtz(double8 data, size_t offset, __local half *p);\n"
38861"void __ovld vstore_half16_rtz(double16 data, size_t offset, __local half *p);\n"
38862"void __ovld vstore_half2_rtp(double2 data, size_t offset, __local half *p);\n"
38863"void __ovld vstore_half3_rtp(double3 data, size_t offset, __local half *p);\n"
38864"void __ovld vstore_half4_rtp(double4 data, size_t offset, __local half *p);\n"
38865"void __ovld vstore_half8_rtp(double8 data, size_t offset, __local half *p);\n"
38866"void __ovld vstore_half16_rtp(double16 data, size_t offset, __local half *p);\n"
38867"void __ovld vstore_half2_rtn(double2 data, size_t offset, __local half *p);\n"
38868"void __ovld vstore_half3_rtn(double3 data, size_t offset, __local half *p);\n"
38869"void __ovld vstore_half4_rtn(double4 data, size_t offset, __local half *p);\n"
38870"void __ovld vstore_half8_rtn(double8 data, size_t offset, __local half *p);\n"
38871"void __ovld vstore_half16_rtn(double16 data, size_t offset, __local half *p);\n"
38872"void __ovld vstore_half2(double2 data, size_t offset, __private half *p);\n"
38873"void __ovld vstore_half3(double3 data, size_t offset, __private half *p);\n"
38874"void __ovld vstore_half4(double4 data, size_t offset, __private half *p);\n"
38875"void __ovld vstore_half8(double8 data, size_t offset, __private half *p);\n"
38876"void __ovld vstore_half16(double16 data, size_t offset, __private half *p);\n"
38877"void __ovld vstore_half2_rte(double2 data, size_t offset, __private half *p);\n"
38878"void __ovld vstore_half3_rte(double3 data, size_t offset, __private half *p);\n"
38879"void __ovld vstore_half4_rte(double4 data, size_t offset, __private half *p);\n"
38880"void __ovld vstore_half8_rte(double8 data, size_t offset, __private half *p);\n"
38881"void __ovld vstore_half16_rte(double16 data, size_t offset, __private half *p);\n"
38882"void __ovld vstore_half2_rtz(double2 data, size_t offset, __private half *p);\n"
38883"void __ovld vstore_half3_rtz(double3 data, size_t offset, __private half *p);\n"
38884"void __ovld vstore_half4_rtz(double4 data, size_t offset, __private half *p);\n"
38885"void __ovld vstore_half8_rtz(double8 data, size_t offset, __private half *p);\n"
38886"void __ovld vstore_half16_rtz(double16 data, size_t offset, __private half *p);\n"
38887"void __ovld vstore_half2_rtp(double2 data, size_t offset, __private half *p);\n"
38888"void __ovld vstore_half3_rtp(double3 data, size_t offset, __private half *p);\n"
38889"void __ovld vstore_half4_rtp(double4 data, size_t offset, __private half *p);\n"
38890"void __ovld vstore_half8_rtp(double8 data, size_t offset, __private half *p);\n"
38891"void __ovld vstore_half16_rtp(double16 data, size_t offset, __private half *p);\n"
38892"void __ovld vstore_half2_rtn(double2 data, size_t offset, __private half *p);\n"
38893"void __ovld vstore_half3_rtn(double3 data, size_t offset, __private half *p);\n"
38894"void __ovld vstore_half4_rtn(double4 data, size_t offset, __private half *p);\n"
38895"void __ovld vstore_half8_rtn(double8 data, size_t offset, __private half *p);\n"
38896"void __ovld vstore_half16_rtn(double16 data, size_t offset, __private half *p);\n"
38897"#endif //cl_khr_fp64\n"
38898"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
38899"\n"
38900"/**\n"
38901" * For n = 1, 2, 4, 8 and 16 read sizeof (halfn)\n"
38902" * bytes of data from address (p + (offset * n)).\n"
38903" * The data read is interpreted as a halfn value.\n"
38904" * The halfn value read is converted to a floatn\n"
38905" * value and the floatn value is returned.\n"
38906" * The address computed as (p + (offset * n))\n"
38907" * must be aligned to sizeof (halfn) bytes.\n"
38908" * For n = 3, vloada_half3 reads a half3 from\n"
38909" * address (p + (offset * 4)) and returns a float3.\n"
38910" * The address computed as (p + (offset * 4))\n"
38911" * must be aligned to sizeof (half) * 4 bytes.\n"
38912" */\n"
38913"float __ovld vloada_half(size_t offset, const __constant half *p);\n"
38914"float2 __ovld vloada_half2(size_t offset, const __constant half *p);\n"
38915"float3 __ovld vloada_half3(size_t offset, const __constant half *p);\n"
38916"float4 __ovld vloada_half4(size_t offset, const __constant half *p);\n"
38917"float8 __ovld vloada_half8(size_t offset, const __constant half *p);\n"
38918"float16 __ovld vloada_half16(size_t offset, const __constant half *p);\n"
38919"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
38920"float __ovld vloada_half(size_t offset, const half *p);\n"
38921"float2 __ovld vloada_half2(size_t offset, const half *p);\n"
38922"float3 __ovld vloada_half3(size_t offset, const half *p);\n"
38923"float4 __ovld vloada_half4(size_t offset, const half *p);\n"
38924"float8 __ovld vloada_half8(size_t offset, const half *p);\n"
38925"float16 __ovld vloada_half16(size_t offset, const half *p);\n"
38926"#else\n"
38927"float __ovld vloada_half(size_t offset, const __global half *p);\n"
38928"float2 __ovld vloada_half2(size_t offset, const __global half *p);\n"
38929"float3 __ovld vloada_half3(size_t offset, const __global half *p);\n"
38930"float4 __ovld vloada_half4(size_t offset, const __global half *p);\n"
38931"float8 __ovld vloada_half8(size_t offset, const __global half *p);\n"
38932"float16 __ovld vloada_half16(size_t offset, const __global half *p);\n"
38933"float __ovld vloada_half(size_t offset, const __local half *p);\n"
38934"float2 __ovld vloada_half2(size_t offset, const __local half *p);\n"
38935"float3 __ovld vloada_half3(size_t offset, const __local half *p);\n"
38936"float4 __ovld vloada_half4(size_t offset, const __local half *p);\n"
38937"float8 __ovld vloada_half8(size_t offset, const __local half *p);\n"
38938"float16 __ovld vloada_half16(size_t offset, const __local half *p);\n"
38939"float __ovld vloada_half(size_t offset, const __private half *p);\n"
38940"float2 __ovld vloada_half2(size_t offset, const __private half *p);\n"
38941"float3 __ovld vloada_half3(size_t offset, const __private half *p);\n"
38942"float4 __ovld vloada_half4(size_t offset, const __private half *p);\n"
38943"float8 __ovld vloada_half8(size_t offset, const __private half *p);\n"
38944"float16 __ovld vloada_half16(size_t offset, const __private half *p);\n"
38945"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
38946"\n"
38947"/**\n"
38948" * The floatn value given by data is converted to\n"
38949" * a halfn value using the appropriate rounding\n"
38950" * mode.\n"
38951" * For n = 1, 2, 4, 8 and 16, the halfn value is\n"
38952" * written to the address computed as (p + (offset\n"
38953" * * n)). The address computed as (p + (offset *\n"
38954" * n)) must be aligned to sizeof (halfn) bytes.\n"
38955" * For n = 3, the half3 value is written to the\n"
38956" * address computed as (p + (offset * 4)). The\n"
38957" * address computed as (p + (offset * 4)) must be\n"
38958" * aligned to sizeof (half) * 4 bytes.\n"
38959" * vstorea_halfn uses the current rounding\n"
38960" * mode. The default current rounding mode is\n"
38961" * round to nearest even.\n"
38962" */\n"
38963"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
38964"void __ovld vstorea_half(float data, size_t offset, half *p);\n"
38965"void __ovld vstorea_half2(float2 data, size_t offset, half *p);\n"
38966"void __ovld vstorea_half3(float3 data, size_t offset, half *p);\n"
38967"void __ovld vstorea_half4(float4 data, size_t offset, half *p);\n"
38968"void __ovld vstorea_half8(float8 data, size_t offset, half *p);\n"
38969"void __ovld vstorea_half16(float16 data, size_t offset, half *p);\n"
38970"\n"
38971"void __ovld vstorea_half_rte(float data, size_t offset, half *p);\n"
38972"void __ovld vstorea_half2_rte(float2 data, size_t offset, half *p);\n"
38973"void __ovld vstorea_half3_rte(float3 data, size_t offset, half *p);\n"
38974"void __ovld vstorea_half4_rte(float4 data, size_t offset, half *p);\n"
38975"void __ovld vstorea_half8_rte(float8 data, size_t offset, half *p);\n"
38976"void __ovld vstorea_half16_rte(float16 data, size_t offset, half *p);\n"
38977"\n"
38978"void __ovld vstorea_half_rtz(float data, size_t offset, half *p);\n"
38979"void __ovld vstorea_half2_rtz(float2 data, size_t offset, half *p);\n"
38980"void __ovld vstorea_half3_rtz(float3 data, size_t offset, half *p);\n"
38981"void __ovld vstorea_half4_rtz(float4 data, size_t offset, half *p);\n"
38982"void __ovld vstorea_half8_rtz(float8 data, size_t offset, half *p);\n"
38983"void __ovld vstorea_half16_rtz(float16 data, size_t offset, half *p);\n"
38984"\n"
38985"void __ovld vstorea_half_rtp(float data, size_t offset, half *p);\n"
38986"void __ovld vstorea_half2_rtp(float2 data, size_t offset, half *p);\n"
38987"void __ovld vstorea_half3_rtp(float3 data, size_t offset, half *p);\n"
38988"void __ovld vstorea_half4_rtp(float4 data, size_t offset, half *p);\n"
38989"void __ovld vstorea_half8_rtp(float8 data, size_t offset, half *p);\n"
38990"void __ovld vstorea_half16_rtp(float16 data, size_t offset, half *p);\n"
38991"\n"
38992"void __ovld vstorea_half_rtn(float data, size_t offset, half *p);\n"
38993"void __ovld vstorea_half2_rtn(float2 data, size_t offset, half *p);\n"
38994"void __ovld vstorea_half3_rtn(float3 data, size_t offset, half *p);\n"
38995"void __ovld vstorea_half4_rtn(float4 data, size_t offset, half *p);\n"
38996"void __ovld vstorea_half8_rtn(float8 data, size_t offset, half *p);\n"
38997"void __ovld vstorea_half16_rtn(float16 data, size_t offset, half *p);\n"
38998"\n"
38999"#ifdef cl_khr_fp64\n"
39000"void __ovld vstorea_half(double data, size_t offset, half *p);\n"
39001"void __ovld vstorea_half2(double2 data, size_t offset, half *p);\n"
39002"void __ovld vstorea_half3(double3 data, size_t offset, half *p);\n"
39003"void __ovld vstorea_half4(double4 data, size_t offset, half *p);\n"
39004"void __ovld vstorea_half8(double8 data, size_t offset, half *p);\n"
39005"void __ovld vstorea_half16(double16 data, size_t offset, half *p);\n"
39006"\n"
39007"void __ovld vstorea_half_rte(double data, size_t offset, half *p);\n"
39008"void __ovld vstorea_half2_rte(double2 data, size_t offset, half *p);\n"
39009"void __ovld vstorea_half3_rte(double3 data, size_t offset, half *p);\n"
39010"void __ovld vstorea_half4_rte(double4 data, size_t offset, half *p);\n"
39011"void __ovld vstorea_half8_rte(double8 data, size_t offset, half *p);\n"
39012"void __ovld vstorea_half16_rte(double16 data, size_t offset, half *p);\n"
39013"\n"
39014"void __ovld vstorea_half_rtz(double data, size_t offset, half *p);\n"
39015"void __ovld vstorea_half2_rtz(double2 data, size_t offset, half *p);\n"
39016"void __ovld vstorea_half3_rtz(double3 data, size_t offset, half *p);\n"
39017"void __ovld vstorea_half4_rtz(double4 data, size_t offset, half *p);\n"
39018"void __ovld vstorea_half8_rtz(double8 data, size_t offset, half *p);\n"
39019"void __ovld vstorea_half16_rtz(double16 data, size_t offset, half *p);\n"
39020"\n"
39021"void __ovld vstorea_half_rtp(double data, size_t offset, half *p);\n"
39022"void __ovld vstorea_half2_rtp(double2 data, size_t offset, half *p);\n"
39023"void __ovld vstorea_half3_rtp(double3 data, size_t offset, half *p);\n"
39024"void __ovld vstorea_half4_rtp(double4 data, size_t offset, half *p);\n"
39025"void __ovld vstorea_half8_rtp(double8 data, size_t offset, half *p);\n"
39026"void __ovld vstorea_half16_rtp(double16 data, size_t offset, half *p);\n"
39027"\n"
39028"void __ovld vstorea_half_rtn(double data, size_t offset, half *p);\n"
39029"void __ovld vstorea_half2_rtn(double2 data, size_t offset, half *p);\n"
39030"void __ovld vstorea_half3_rtn(double3 data, size_t offset, half *p);\n"
39031"void __ovld vstorea_half4_rtn(double4 data, size_t offset, half *p);\n"
39032"void __ovld vstorea_half8_rtn(double8 data, size_t offset, half *p);\n"
39033"void __ovld vstorea_half16_rtn(double16 data, size_t offset, half *p);\n"
39034"#endif //cl_khr_fp64\n"
39035"\n"
39036"#else\n"
39037"void __ovld vstorea_half(float data, size_t offset, __global half *p);\n"
39038"void __ovld vstorea_half2(float2 data, size_t offset, __global half *p);\n"
39039"void __ovld vstorea_half3(float3 data, size_t offset, __global half *p);\n"
39040"void __ovld vstorea_half4(float4 data, size_t offset, __global half *p);\n"
39041"void __ovld vstorea_half8(float8 data, size_t offset, __global half *p);\n"
39042"void __ovld vstorea_half16(float16 data, size_t offset, __global half *p);\n"
39043"\n"
39044"void __ovld vstorea_half_rte(float data, size_t offset, __global half *p);\n"
39045"void __ovld vstorea_half2_rte(float2 data, size_t offset, __global half *p);\n"
39046"void __ovld vstorea_half3_rte(float3 data, size_t offset, __global half *p);\n"
39047"void __ovld vstorea_half4_rte(float4 data, size_t offset, __global half *p);\n"
39048"void __ovld vstorea_half8_rte(float8 data, size_t offset, __global half *p);\n"
39049"void __ovld vstorea_half16_rte(float16 data, size_t offset, __global half *p);\n"
39050"\n"
39051"void __ovld vstorea_half_rtz(float data, size_t offset, __global half *p);\n"
39052"void __ovld vstorea_half2_rtz(float2 data, size_t offset, __global half *p);\n"
39053"void __ovld vstorea_half3_rtz(float3 data, size_t offset, __global half *p);\n"
39054"void __ovld vstorea_half4_rtz(float4 data, size_t offset, __global half *p);\n"
39055"void __ovld vstorea_half8_rtz(float8 data, size_t offset, __global half *p);\n"
39056"void __ovld vstorea_half16_rtz(float16 data, size_t offset, __global half *p);\n"
39057"\n"
39058"void __ovld vstorea_half_rtp(float data, size_t offset, __global half *p);\n"
39059"void __ovld vstorea_half2_rtp(float2 data, size_t offset, __global half *p);\n"
39060"void __ovld vstorea_half3_rtp(float3 data, size_t offset, __global half *p);\n"
39061"void __ovld vstorea_half4_rtp(float4 data, size_t offset, __global half *p);\n"
39062"void __ovld vstorea_half8_rtp(float8 data, size_t offset, __global half *p);\n"
39063"void __ovld vstorea_half16_rtp(float16 data, size_t offset, __global half *p);\n"
39064"\n"
39065"void __ovld vstorea_half_rtn(float data, size_t offset, __global half *p);\n"
39066"void __ovld vstorea_half2_rtn(float2 data, size_t offset, __global half *p);\n"
39067"void __ovld vstorea_half3_rtn(float3 data, size_t offset, __global half *p);\n"
39068"void __ovld vstorea_half4_rtn(float4 data, size_t offset, __global half *p);\n"
39069"void __ovld vstorea_half8_rtn(float8 data, size_t offset, __global half *p);\n"
39070"void __ovld vstorea_half16_rtn(float16 data, size_t offset, __global half *p);\n"
39071"\n"
39072"void __ovld vstorea_half(float data, size_t offset, __local half *p);\n"
39073"void __ovld vstorea_half2(float2 data, size_t offset, __local half *p);\n"
39074"void __ovld vstorea_half3(float3 data, size_t offset, __local half *p);\n"
39075"void __ovld vstorea_half4(float4 data, size_t offset, __local half *p);\n"
39076"void __ovld vstorea_half8(float8 data, size_t offset, __local half *p);\n"
39077"void __ovld vstorea_half16(float16 data, size_t offset, __local half *p);\n"
39078"\n"
39079"void __ovld vstorea_half_rte(float data, size_t offset, __local half *p);\n"
39080"void __ovld vstorea_half2_rte(float2 data, size_t offset, __local half *p);\n"
39081"void __ovld vstorea_half3_rte(float3 data, size_t offset, __local half *p);\n"
39082"void __ovld vstorea_half4_rte(float4 data, size_t offset, __local half *p);\n"
39083"void __ovld vstorea_half8_rte(float8 data, size_t offset, __local half *p);\n"
39084"void __ovld vstorea_half16_rte(float16 data, size_t offset, __local half *p);\n"
39085"\n"
39086"void __ovld vstorea_half_rtz(float data, size_t offset, __local half *p);\n"
39087"void __ovld vstorea_half2_rtz(float2 data, size_t offset, __local half *p);\n"
39088"void __ovld vstorea_half3_rtz(float3 data, size_t offset, __local half *p);\n"
39089"void __ovld vstorea_half4_rtz(float4 data, size_t offset, __local half *p);\n"
39090"void __ovld vstorea_half8_rtz(float8 data, size_t offset, __local half *p);\n"
39091"void __ovld vstorea_half16_rtz(float16 data, size_t offset, __local half *p);\n"
39092"\n"
39093"void __ovld vstorea_half_rtp(float data, size_t offset, __local half *p);\n"
39094"void __ovld vstorea_half2_rtp(float2 data, size_t offset, __local half *p);\n"
39095"void __ovld vstorea_half3_rtp(float3 data, size_t offset, __local half *p);\n"
39096"void __ovld vstorea_half4_rtp(float4 data, size_t offset, __local half *p);\n"
39097"void __ovld vstorea_half8_rtp(float8 data, size_t offset, __local half *p);\n"
39098"void __ovld vstorea_half16_rtp(float16 data, size_t offset, __local half *p);\n"
39099"\n"
39100"void __ovld vstorea_half_rtn(float data, size_t offset, __local half *p);\n"
39101"void __ovld vstorea_half2_rtn(float2 data, size_t offset, __local half *p);\n"
39102"void __ovld vstorea_half3_rtn(float3 data, size_t offset, __local half *p);\n"
39103"void __ovld vstorea_half4_rtn(float4 data, size_t offset, __local half *p);\n"
39104"void __ovld vstorea_half8_rtn(float8 data, size_t offset, __local half *p);\n"
39105"void __ovld vstorea_half16_rtn(float16 data, size_t offset, __local half *p);\n"
39106"\n"
39107"void __ovld vstorea_half(float data, size_t offset, __private half *p);\n"
39108"void __ovld vstorea_half2(float2 data, size_t offset, __private half *p);\n"
39109"void __ovld vstorea_half3(float3 data, size_t offset, __private half *p);\n"
39110"void __ovld vstorea_half4(float4 data, size_t offset, __private half *p);\n"
39111"void __ovld vstorea_half8(float8 data, size_t offset, __private half *p);\n"
39112"void __ovld vstorea_half16(float16 data, size_t offset, __private half *p);\n"
39113"\n"
39114"void __ovld vstorea_half_rte(float data, size_t offset, __private half *p);\n"
39115"void __ovld vstorea_half2_rte(float2 data, size_t offset, __private half *p);\n"
39116"void __ovld vstorea_half3_rte(float3 data, size_t offset, __private half *p);\n"
39117"void __ovld vstorea_half4_rte(float4 data, size_t offset, __private half *p);\n"
39118"void __ovld vstorea_half8_rte(float8 data, size_t offset, __private half *p);\n"
39119"void __ovld vstorea_half16_rte(float16 data, size_t offset, __private half *p);\n"
39120"\n"
39121"void __ovld vstorea_half_rtz(float data, size_t offset, __private half *p);\n"
39122"void __ovld vstorea_half2_rtz(float2 data, size_t offset, __private half *p);\n"
39123"void __ovld vstorea_half3_rtz(float3 data, size_t offset, __private half *p);\n"
39124"void __ovld vstorea_half4_rtz(float4 data, size_t offset, __private half *p);\n"
39125"void __ovld vstorea_half8_rtz(float8 data, size_t offset, __private half *p);\n"
39126"void __ovld vstorea_half16_rtz(float16 data, size_t offset, __private half *p);\n"
39127"\n"
39128"void __ovld vstorea_half_rtp(float data, size_t offset, __private half *p);\n"
39129"void __ovld vstorea_half2_rtp(float2 data, size_t offset, __private half *p);\n"
39130"void __ovld vstorea_half3_rtp(float3 data, size_t offset, __private half *p);\n"
39131"void __ovld vstorea_half4_rtp(float4 data, size_t offset, __private half *p);\n"
39132"void __ovld vstorea_half8_rtp(float8 data, size_t offset, __private half *p);\n"
39133"void __ovld vstorea_half16_rtp(float16 data, size_t offset, __private half *p);\n"
39134"\n"
39135"void __ovld vstorea_half_rtn(float data, size_t offset, __private half *p);\n"
39136"void __ovld vstorea_half2_rtn(float2 data, size_t offset, __private half *p);\n"
39137"void __ovld vstorea_half3_rtn(float3 data, size_t offset, __private half *p);\n"
39138"void __ovld vstorea_half4_rtn(float4 data, size_t offset, __private half *p);\n"
39139"void __ovld vstorea_half8_rtn(float8 data, size_t offset, __private half *p);\n"
39140"void __ovld vstorea_half16_rtn(float16 data, size_t offset, __private half *p);\n"
39141"\n"
39142"#ifdef cl_khr_fp64\n"
39143"void __ovld vstorea_half(double data, size_t offset, __global half *p);\n"
39144"void __ovld vstorea_half2(double2 data, size_t offset, __global half *p);\n"
39145"void __ovld vstorea_half3(double3 data, size_t offset, __global half *p);\n"
39146"void __ovld vstorea_half4(double4 data, size_t offset, __global half *p);\n"
39147"void __ovld vstorea_half8(double8 data, size_t offset, __global half *p);\n"
39148"void __ovld vstorea_half16(double16 data, size_t offset, __global half *p);\n"
39149"\n"
39150"void __ovld vstorea_half_rte(double data, size_t offset, __global half *p);\n"
39151"void __ovld vstorea_half2_rte(double2 data, size_t offset, __global half *p);\n"
39152"void __ovld vstorea_half3_rte(double3 data, size_t offset, __global half *p);\n"
39153"void __ovld vstorea_half4_rte(double4 data, size_t offset, __global half *p);\n"
39154"void __ovld vstorea_half8_rte(double8 data, size_t offset, __global half *p);\n"
39155"void __ovld vstorea_half16_rte(double16 data, size_t offset, __global half *p);\n"
39156"\n"
39157"void __ovld vstorea_half_rtz(double data, size_t offset, __global half *p);\n"
39158"void __ovld vstorea_half2_rtz(double2 data, size_t offset, __global half *p);\n"
39159"void __ovld vstorea_half3_rtz(double3 data, size_t offset, __global half *p);\n"
39160"void __ovld vstorea_half4_rtz(double4 data, size_t offset, __global half *p);\n"
39161"void __ovld vstorea_half8_rtz(double8 data, size_t offset, __global half *p);\n"
39162"void __ovld vstorea_half16_rtz(double16 data, size_t offset, __global half *p);\n"
39163"\n"
39164"void __ovld vstorea_half_rtp(double data, size_t offset, __global half *p);\n"
39165"void __ovld vstorea_half2_rtp(double2 data, size_t offset, __global half *p);\n"
39166"void __ovld vstorea_half3_rtp(double3 data, size_t offset, __global half *p);\n"
39167"void __ovld vstorea_half4_rtp(double4 data, size_t offset, __global half *p);\n"
39168"void __ovld vstorea_half8_rtp(double8 data, size_t offset, __global half *p);\n"
39169"void __ovld vstorea_half16_rtp(double16 data, size_t offset, __global half *p);\n"
39170"\n"
39171"void __ovld vstorea_half_rtn(double data, size_t offset, __global half *p);\n"
39172"void __ovld vstorea_half2_rtn(double2 data, size_t offset, __global half *p);\n"
39173"void __ovld vstorea_half3_rtn(double3 data, size_t offset, __global half *p);\n"
39174"void __ovld vstorea_half4_rtn(double4 data, size_t offset, __global half *p);\n"
39175"void __ovld vstorea_half8_rtn(double8 data, size_t offset, __global half *p);\n"
39176"void __ovld vstorea_half16_rtn(double16 data, size_t offset, __global half *p);\n"
39177"\n"
39178"void __ovld vstorea_half(double data, size_t offset, __local half *p);\n"
39179"void __ovld vstorea_half2(double2 data, size_t offset, __local half *p);\n"
39180"void __ovld vstorea_half3(double3 data, size_t offset, __local half *p);\n"
39181"void __ovld vstorea_half4(double4 data, size_t offset, __local half *p);\n"
39182"void __ovld vstorea_half8(double8 data, size_t offset, __local half *p);\n"
39183"void __ovld vstorea_half16(double16 data, size_t offset, __local half *p);\n"
39184"\n"
39185"void __ovld vstorea_half_rte(double data, size_t offset, __local half *p);\n"
39186"void __ovld vstorea_half2_rte(double2 data, size_t offset, __local half *p);\n"
39187"void __ovld vstorea_half3_rte(double3 data, size_t offset, __local half *p);\n"
39188"void __ovld vstorea_half4_rte(double4 data, size_t offset, __local half *p);\n"
39189"void __ovld vstorea_half8_rte(double8 data, size_t offset, __local half *p);\n"
39190"void __ovld vstorea_half16_rte(double16 data, size_t offset, __local half *p);\n"
39191"\n"
39192"void __ovld vstorea_half_rtz(double data, size_t offset, __local half *p);\n"
39193"void __ovld vstorea_half2_rtz(double2 data, size_t offset, __local half *p);\n"
39194"void __ovld vstorea_half3_rtz(double3 data, size_t offset, __local half *p);\n"
39195"void __ovld vstorea_half4_rtz(double4 data, size_t offset, __local half *p);\n"
39196"void __ovld vstorea_half8_rtz(double8 data, size_t offset, __local half *p);\n"
39197"void __ovld vstorea_half16_rtz(double16 data, size_t offset, __local half *p);\n"
39198"\n"
39199"void __ovld vstorea_half_rtp(double data, size_t offset, __local half *p);\n"
39200"void __ovld vstorea_half2_rtp(double2 data, size_t offset, __local half *p);\n"
39201"void __ovld vstorea_half3_rtp(double3 data, size_t offset, __local half *p);\n"
39202"void __ovld vstorea_half4_rtp(double4 data, size_t offset, __local half *p);\n"
39203"void __ovld vstorea_half8_rtp(double8 data, size_t offset, __local half *p);\n"
39204"void __ovld vstorea_half16_rtp(double16 data, size_t offset, __local half *p);\n"
39205"\n"
39206"void __ovld vstorea_half_rtn(double data, size_t offset, __local half *p);\n"
39207"void __ovld vstorea_half2_rtn(double2 data, size_t offset, __local half *p);\n"
39208"void __ovld vstorea_half3_rtn(double3 data, size_t offset, __local half *p);\n"
39209"void __ovld vstorea_half4_rtn(double4 data, size_t offset, __local half *p);\n"
39210"void __ovld vstorea_half8_rtn(double8 data, size_t offset, __local half *p);\n"
39211"void __ovld vstorea_half16_rtn(double16 data, size_t offset, __local half *p);\n"
39212"\n"
39213"void __ovld vstorea_half(double data, size_t offset, __private half *p);\n"
39214"void __ovld vstorea_half2(double2 data, size_t offset, __private half *p);\n"
39215"void __ovld vstorea_half3(double3 data, size_t offset, __private half *p);\n"
39216"void __ovld vstorea_half4(double4 data, size_t offset, __private half *p);\n"
39217"void __ovld vstorea_half8(double8 data, size_t offset, __private half *p);\n"
39218"void __ovld vstorea_half16(double16 data, size_t offset, __private half *p);\n"
39219"\n"
39220"void __ovld vstorea_half_rte(double data, size_t offset, __private half *p);\n"
39221"void __ovld vstorea_half2_rte(double2 data, size_t offset, __private half *p);\n"
39222"void __ovld vstorea_half3_rte(double3 data, size_t offset, __private half *p);\n"
39223"void __ovld vstorea_half4_rte(double4 data, size_t offset, __private half *p);\n"
39224"void __ovld vstorea_half8_rte(double8 data, size_t offset, __private half *p);\n"
39225"void __ovld vstorea_half16_rte(double16 data, size_t offset, __private half *p);\n"
39226"\n"
39227"void __ovld vstorea_half_rtz(double data, size_t offset, __private half *p);\n"
39228"void __ovld vstorea_half2_rtz(double2 data, size_t offset, __private half *p);\n"
39229"void __ovld vstorea_half3_rtz(double3 data, size_t offset, __private half *p);\n"
39230"void __ovld vstorea_half4_rtz(double4 data, size_t offset, __private half *p);\n"
39231"void __ovld vstorea_half8_rtz(double8 data, size_t offset, __private half *p);\n"
39232"void __ovld vstorea_half16_rtz(double16 data, size_t offset, __private half *p);\n"
39233"\n"
39234"void __ovld vstorea_half_rtp(double data, size_t offset, __private half *p);\n"
39235"void __ovld vstorea_half2_rtp(double2 data, size_t offset, __private half *p);\n"
39236"void __ovld vstorea_half3_rtp(double3 data, size_t offset, __private half *p);\n"
39237"void __ovld vstorea_half4_rtp(double4 data, size_t offset, __private half *p);\n"
39238"void __ovld vstorea_half8_rtp(double8 data, size_t offset, __private half *p);\n"
39239"void __ovld vstorea_half16_rtp(double16 data, size_t offset, __private half *p);\n"
39240"\n"
39241"void __ovld vstorea_half_rtn(double data, size_t offset, __private half *p);\n"
39242"void __ovld vstorea_half2_rtn(double2 data,size_t offset, __private half *p);\n"
39243"void __ovld vstorea_half3_rtn(double3 data,size_t offset, __private half *p);\n"
39244"void __ovld vstorea_half4_rtn(double4 data,size_t offset, __private half *p);\n"
39245"void __ovld vstorea_half8_rtn(double8 data,size_t offset, __private half *p);\n"
39246"void __ovld vstorea_half16_rtn(double16 data,size_t offset, __private half *p);\n"
39247"#endif //cl_khr_fp64\n"
39248"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
39249"\n"
39250"// OpenCL v1.1 s6.11.8, v1.2 s6.12.8, v2.0 s6.13.8 - Synchronization Functions\n"
39251"\n"
39252"// Flag type and values for barrier, mem_fence, read_mem_fence, write_mem_fence\n"
39253"typedef uint cl_mem_fence_flags;\n"
39254"\n"
39255"/**\n"
39256" * Queue a memory fence to ensure correct\n"
39257" * ordering of memory operations to local memory\n"
39258" */\n"
39259"#define CLK_LOCAL_MEM_FENCE 0x01\n"
39260"\n"
39261"/**\n"
39262" * Queue a memory fence to ensure correct\n"
39263" * ordering of memory operations to global memory\n"
39264" */\n"
39265"#define CLK_GLOBAL_MEM_FENCE 0x02\n"
39266"\n"
39267"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
39268"/**\n"
39269" * Queue a memory fence to ensure correct ordering of memory\n"
39270" * operations between work-items of a work-group to\n"
39271" * image memory.\n"
39272" */\n"
39273"#define CLK_IMAGE_MEM_FENCE 0x04\n"
39274"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
39275"\n"
39276"/**\n"
39277" * All work-items in a work-group executing the kernel\n"
39278" * on a processor must execute this function before any\n"
39279" * are allowed to continue execution beyond the barrier.\n"
39280" * This function must be encountered by all work-items in\n"
39281" * a work-group executing the kernel.\n"
39282" * If barrier is inside a conditional statement, then all\n"
39283" * work-items must enter the conditional if any work-item\n"
39284" * enters the conditional statement and executes the\n"
39285" * barrier.\n"
39286" * If barrer is inside a loop, all work-items must execute\n"
39287" * the barrier for each iteration of the loop before any are\n"
39288" * allowed to continue execution beyond the barrier.\n"
39289" * The barrier function also queues a memory fence\n"
39290" * (reads and writes) to ensure correct ordering of\n"
39291" * memory operations to local or global memory.\n"
39292" * The flags argument specifies the memory address space\n"
39293" * and can be set to a combination of the following literal\n"
39294" * values.\n"
39295" * CLK_LOCAL_MEM_FENCE - The barrier function\n"
39296" * will either flush any variables stored in local memory\n"
39297" * or queue a memory fence to ensure correct ordering of\n"
39298" * memory operations to local memory.\n"
39299" * CLK_GLOBAL_MEM_FENCE - The barrier function\n"
39300" * will queue a memory fence to ensure correct ordering\n"
39301" * of memory operations to global memory. This can be\n"
39302" * useful when work-items, for example, write to buffer or\n"
39303" * image objects and then want to read the updated data.\n"
39304" */\n"
39305"\n"
39306"void __ovld __conv barrier(cl_mem_fence_flags flags);\n"
39307"\n"
39308"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
39309"\n"
39310"typedef enum memory_scope {\n"
39311" memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,\n"
39312" memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,\n"
39313" memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,\n"
39314" memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,\n"
39315"#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups)\n"
39316" memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP\n"
39317"#endif\n"
39318"} memory_scope;\n"
39319"\n"
39320"void __ovld __conv work_group_barrier(cl_mem_fence_flags flags, memory_scope scope);\n"
39321"void __ovld __conv work_group_barrier(cl_mem_fence_flags flags);\n"
39322"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
39323"\n"
39324"// OpenCL v1.1 s6.11.9, v1.2 s6.12.9 - Explicit Memory Fence Functions\n"
39325"\n"
39326"/**\n"
39327" * Orders loads and stores of a work-item\n"
39328" * executing a kernel. This means that loads\n"
39329" * and stores preceding the mem_fence will\n"
39330" * be committed to memory before any loads\n"
39331" * and stores following the mem_fence.\n"
39332" * The flags argument specifies the memory\n"
39333" * address space and can be set to a\n"
39334" * combination of the following literal\n"
39335" * values:\n"
39336" * CLK_LOCAL_MEM_FENCE\n"
39337" * CLK_GLOBAL_MEM_FENCE.\n"
39338" */\n"
39339"void __ovld mem_fence(cl_mem_fence_flags flags);\n"
39340"\n"
39341"/**\n"
39342" * Read memory barrier that orders only\n"
39343" * loads.\n"
39344" * The flags argument specifies the memory\n"
39345" * address space and can be set to a\n"
39346" * combination of the following literal\n"
39347" * values:\n"
39348" * CLK_LOCAL_MEM_FENCE\n"
39349" * CLK_GLOBAL_MEM_FENCE.\n"
39350" */\n"
39351"void __ovld read_mem_fence(cl_mem_fence_flags flags);\n"
39352"\n"
39353"/**\n"
39354" * Write memory barrier that orders only\n"
39355" * stores.\n"
39356" * The flags argument specifies the memory\n"
39357" * address space and can be set to a\n"
39358" * combination of the following literal\n"
39359" * values:\n"
39360" * CLK_LOCAL_MEM_FENCE\n"
39361" * CLK_GLOBAL_MEM_FENCE.\n"
39362" */\n"
39363"void __ovld write_mem_fence(cl_mem_fence_flags flags);\n"
39364"\n"
39365"// OpenCL v2.0 s6.13.9 - Address Space Qualifier Functions\n"
39366"\n"
39367"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
39368"cl_mem_fence_flags __ovld get_fence(const void *ptr);\n"
39369"cl_mem_fence_flags __ovld get_fence(void *ptr);\n"
39370"\n"
39371"/**\n"
39372" * Builtin functions to_global, to_local, and to_private need to be declared as Clang builtin functions\n"
39373" * and checked in Sema since they should be declared as\n"
39374" * addr gentype* to_addr (gentype*);\n"
39375" * where gentype is builtin type or user defined type.\n"
39376" */\n"
39377"\n"
39378"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
39379"\n"
39380"// OpenCL v1.1 s6.11.10, v1.2 s6.12.10, v2.0 s6.13.10 - Async Copies from Global to Local Memory, Local to Global Memory, and Prefetch\n"
39381"\n"
39382"/**\n"
39383" * event_t async_work_group_copy (\n"
39384" * __global gentype *dst,\n"
39385" * const __local gentype *src,\n"
39386" * size_t num_elements,\n"
39387" * event_t event)\n"
39388" * Perform an async copy of num_elements\n"
39389" * gentype elements from src to dst. The async\n"
39390" * copy is performed by all work-items in a workgroup\n"
39391" * and this built-in function must therefore\n"
39392" * be encountered by all work-items in a workgroup\n"
39393" * executing the kernel with the same\n"
39394" * argument values; otherwise the results are\n"
39395" * undefined.\n"
39396" * Returns an event object that can be used by\n"
39397" * wait_group_events to wait for the async copy\n"
39398" * to finish. The event argument can also be used\n"
39399" * to associate the async_work_group_copy with\n"
39400" * a previous async copy allowing an event to be\n"
39401" * shared by multiple async copies; otherwise event\n"
39402" * should be zero.\n"
39403" * If event argument is non-zero, the event object\n"
39404" * supplied in event argument will be returned.\n"
39405" * This function does not perform any implicit\n"
39406" * synchronization of source data such as using a\n"
39407" * barrier before performing the copy.\n"
39408" */\n"
39409"event_t __ovld async_work_group_copy(__local char *dst, const __global char *src, size_t num_elements, event_t event);\n"
39410"event_t __ovld async_work_group_copy(__local uchar *dst, const __global uchar *src, size_t num_elements, event_t event);\n"
39411"event_t __ovld async_work_group_copy(__local short *dst, const __global short *src, size_t num_elements, event_t event);\n"
39412"event_t __ovld async_work_group_copy(__local ushort *dst, const __global ushort *src, size_t num_elements, event_t event);\n"
39413"event_t __ovld async_work_group_copy(__local int *dst, const __global int *src, size_t num_elements, event_t event);\n"
39414"event_t __ovld async_work_group_copy(__local uint *dst, const __global uint *src, size_t num_elements, event_t event);\n"
39415"event_t __ovld async_work_group_copy(__local long *dst, const __global long *src, size_t num_elements, event_t event);\n"
39416"event_t __ovld async_work_group_copy(__local ulong *dst, const __global ulong *src, size_t num_elements, event_t event);\n"
39417"event_t __ovld async_work_group_copy(__local float *dst, const __global float *src, size_t num_elements, event_t event);\n"
39418"event_t __ovld async_work_group_copy(__local char2 *dst, const __global char2 *src, size_t num_elements, event_t event);\n"
39419"event_t __ovld async_work_group_copy(__local uchar2 *dst, const __global uchar2 *src, size_t num_elements, event_t event);\n"
39420"event_t __ovld async_work_group_copy(__local short2 *dst, const __global short2 *src, size_t num_elements, event_t event);\n"
39421"event_t __ovld async_work_group_copy(__local ushort2 *dst, const __global ushort2 *src, size_t num_elements, event_t event);\n"
39422"event_t __ovld async_work_group_copy(__local int2 *dst, const __global int2 *src, size_t num_elements, event_t event);\n"
39423"event_t __ovld async_work_group_copy(__local uint2 *dst, const __global uint2 *src, size_t num_elements, event_t event);\n"
39424"event_t __ovld async_work_group_copy(__local long2 *dst, const __global long2 *src, size_t num_elements, event_t event);\n"
39425"event_t __ovld async_work_group_copy(__local ulong2 *dst, const __global ulong2 *src, size_t num_elements, event_t event);\n"
39426"event_t __ovld async_work_group_copy(__local float2 *dst, const __global float2 *src, size_t num_elements, event_t event);\n"
39427"event_t __ovld async_work_group_copy(__local char3 *dst, const __global char3 *src, size_t num_elements, event_t event);\n"
39428"event_t __ovld async_work_group_copy(__local uchar3 *dst, const __global uchar3 *src, size_t num_elements, event_t event);\n"
39429"event_t __ovld async_work_group_copy(__local short3 *dst, const __global short3 *src, size_t num_elements, event_t event);\n"
39430"event_t __ovld async_work_group_copy(__local ushort3 *dst, const __global ushort3 *src, size_t num_elements, event_t event);\n"
39431"event_t __ovld async_work_group_copy(__local int3 *dst, const __global int3 *src, size_t num_elements, event_t event);\n"
39432"event_t __ovld async_work_group_copy(__local uint3 *dst, const __global uint3 *src, size_t num_elements, event_t event);\n"
39433"event_t __ovld async_work_group_copy(__local long3 *dst, const __global long3 *src, size_t num_elements, event_t event);\n"
39434"event_t __ovld async_work_group_copy(__local ulong3 *dst, const __global ulong3 *src, size_t num_elements, event_t event);\n"
39435"event_t __ovld async_work_group_copy(__local float3 *dst, const __global float3 *src, size_t num_elements, event_t event);\n"
39436"event_t __ovld async_work_group_copy(__local char4 *dst, const __global char4 *src, size_t num_elements, event_t event);\n"
39437"event_t __ovld async_work_group_copy(__local uchar4 *dst, const __global uchar4 *src, size_t num_elements, event_t event);\n"
39438"event_t __ovld async_work_group_copy(__local short4 *dst, const __global short4 *src, size_t num_elements, event_t event);\n"
39439"event_t __ovld async_work_group_copy(__local ushort4 *dst, const __global ushort4 *src, size_t num_elements, event_t event);\n"
39440"event_t __ovld async_work_group_copy(__local int4 *dst, const __global int4 *src, size_t num_elements, event_t event);\n"
39441"event_t __ovld async_work_group_copy(__local uint4 *dst, const __global uint4 *src, size_t num_elements, event_t event);\n"
39442"event_t __ovld async_work_group_copy(__local long4 *dst, const __global long4 *src, size_t num_elements, event_t event);\n"
39443"event_t __ovld async_work_group_copy(__local ulong4 *dst, const __global ulong4 *src, size_t num_elements, event_t event);\n"
39444"event_t __ovld async_work_group_copy(__local float4 *dst, const __global float4 *src, size_t num_elements, event_t event);\n"
39445"event_t __ovld async_work_group_copy(__local char8 *dst, const __global char8 *src, size_t num_elements, event_t event);\n"
39446"event_t __ovld async_work_group_copy(__local uchar8 *dst, const __global uchar8 *src, size_t num_elements, event_t event);\n"
39447"event_t __ovld async_work_group_copy(__local short8 *dst, const __global short8 *src, size_t num_elements, event_t event);\n"
39448"event_t __ovld async_work_group_copy(__local ushort8 *dst, const __global ushort8 *src, size_t num_elements, event_t event);\n"
39449"event_t __ovld async_work_group_copy(__local int8 *dst, const __global int8 *src, size_t num_elements, event_t event);\n"
39450"event_t __ovld async_work_group_copy(__local uint8 *dst, const __global uint8 *src, size_t num_elements, event_t event);\n"
39451"event_t __ovld async_work_group_copy(__local long8 *dst, const __global long8 *src, size_t num_elements, event_t event);\n"
39452"event_t __ovld async_work_group_copy(__local ulong8 *dst, const __global ulong8 *src, size_t num_elements, event_t event);\n"
39453"event_t __ovld async_work_group_copy(__local float8 *dst, const __global float8 *src, size_t num_elements, event_t event);\n"
39454"event_t __ovld async_work_group_copy(__local char16 *dst, const __global char16 *src, size_t num_elements, event_t event);\n"
39455"event_t __ovld async_work_group_copy(__local uchar16 *dst, const __global uchar16 *src, size_t num_elements, event_t event);\n"
39456"event_t __ovld async_work_group_copy(__local short16 *dst, const __global short16 *src, size_t num_elements, event_t event);\n"
39457"event_t __ovld async_work_group_copy(__local ushort16 *dst, const __global ushort16 *src, size_t num_elements, event_t event);\n"
39458"event_t __ovld async_work_group_copy(__local int16 *dst, const __global int16 *src, size_t num_elements, event_t event);\n"
39459"event_t __ovld async_work_group_copy(__local uint16 *dst, const __global uint16 *src, size_t num_elements, event_t event);\n"
39460"event_t __ovld async_work_group_copy(__local long16 *dst, const __global long16 *src, size_t num_elements, event_t event);\n"
39461"event_t __ovld async_work_group_copy(__local ulong16 *dst, const __global ulong16 *src, size_t num_elements, event_t event);\n"
39462"event_t __ovld async_work_group_copy(__local float16 *dst, const __global float16 *src, size_t num_elements, event_t event);\n"
39463"event_t __ovld async_work_group_copy(__global char *dst, const __local char *src, size_t num_elements, event_t event);\n"
39464"event_t __ovld async_work_group_copy(__global uchar *dst, const __local uchar *src, size_t num_elements, event_t event);\n"
39465"event_t __ovld async_work_group_copy(__global short *dst, const __local short *src, size_t num_elements, event_t event);\n"
39466"event_t __ovld async_work_group_copy(__global ushort *dst, const __local ushort *src, size_t num_elements, event_t event);\n"
39467"event_t __ovld async_work_group_copy(__global int *dst, const __local int *src, size_t num_elements, event_t event);\n"
39468"event_t __ovld async_work_group_copy(__global uint *dst, const __local uint *src, size_t num_elements, event_t event);\n"
39469"event_t __ovld async_work_group_copy(__global long *dst, const __local long *src, size_t num_elements, event_t event);\n"
39470"event_t __ovld async_work_group_copy(__global ulong *dst, const __local ulong *src, size_t num_elements, event_t event);\n"
39471"event_t __ovld async_work_group_copy(__global float *dst, const __local float *src, size_t num_elements, event_t event);\n"
39472"event_t __ovld async_work_group_copy(__global char2 *dst, const __local char2 *src, size_t num_elements, event_t event);\n"
39473"event_t __ovld async_work_group_copy(__global uchar2 *dst, const __local uchar2 *src, size_t num_elements, event_t event);\n"
39474"event_t __ovld async_work_group_copy(__global short2 *dst, const __local short2 *src, size_t num_elements, event_t event);\n"
39475"event_t __ovld async_work_group_copy(__global ushort2 *dst, const __local ushort2 *src, size_t num_elements, event_t event);\n"
39476"event_t __ovld async_work_group_copy(__global int2 *dst, const __local int2 *src, size_t num_elements, event_t event);\n"
39477"event_t __ovld async_work_group_copy(__global uint2 *dst, const __local uint2 *src, size_t num_elements, event_t event);\n"
39478"event_t __ovld async_work_group_copy(__global long2 *dst, const __local long2 *src, size_t num_elements, event_t event);\n"
39479"event_t __ovld async_work_group_copy(__global ulong2 *dst, const __local ulong2 *src, size_t num_elements, event_t event);\n"
39480"event_t __ovld async_work_group_copy(__global float2 *dst, const __local float2 *src, size_t num_elements, event_t event);\n"
39481"event_t __ovld async_work_group_copy(__global char3 *dst, const __local char3 *src, size_t num_elements, event_t event);\n"
39482"event_t __ovld async_work_group_copy(__global uchar3 *dst, const __local uchar3 *src, size_t num_elements, event_t event);\n"
39483"event_t __ovld async_work_group_copy(__global short3 *dst, const __local short3 *src, size_t num_elements, event_t event);\n"
39484"event_t __ovld async_work_group_copy(__global ushort3 *dst, const __local ushort3 *src, size_t num_elements, event_t event);\n"
39485"event_t __ovld async_work_group_copy(__global int3 *dst, const __local int3 *src, size_t num_elements, event_t event);\n"
39486"event_t __ovld async_work_group_copy(__global uint3 *dst, const __local uint3 *src, size_t num_elements, event_t event);\n"
39487"event_t __ovld async_work_group_copy(__global long3 *dst, const __local long3 *src, size_t num_elements, event_t event);\n"
39488"event_t __ovld async_work_group_copy(__global ulong3 *dst, const __local ulong3 *src, size_t num_elements, event_t event);\n"
39489"event_t __ovld async_work_group_copy(__global float3 *dst, const __local float3 *src, size_t num_elements, event_t event);\n"
39490"event_t __ovld async_work_group_copy(__global char4 *dst, const __local char4 *src, size_t num_elements, event_t event);\n"
39491"event_t __ovld async_work_group_copy(__global uchar4 *dst, const __local uchar4 *src, size_t num_elements, event_t event);\n"
39492"event_t __ovld async_work_group_copy(__global short4 *dst, const __local short4 *src, size_t num_elements, event_t event);\n"
39493"event_t __ovld async_work_group_copy(__global ushort4 *dst, const __local ushort4 *src, size_t num_elements, event_t event);\n"
39494"event_t __ovld async_work_group_copy(__global int4 *dst, const __local int4 *src, size_t num_elements, event_t event);\n"
39495"event_t __ovld async_work_group_copy(__global uint4 *dst, const __local uint4 *src, size_t num_elements, event_t event);\n"
39496"event_t __ovld async_work_group_copy(__global long4 *dst, const __local long4 *src, size_t num_elements, event_t event);\n"
39497"event_t __ovld async_work_group_copy(__global ulong4 *dst, const __local ulong4 *src, size_t num_elements, event_t event);\n"
39498"event_t __ovld async_work_group_copy(__global float4 *dst, const __local float4 *src, size_t num_elements, event_t event);\n"
39499"event_t __ovld async_work_group_copy(__global char8 *dst, const __local char8 *src, size_t num_elements, event_t event);\n"
39500"event_t __ovld async_work_group_copy(__global uchar8 *dst, const __local uchar8 *src, size_t num_elements, event_t event);\n"
39501"event_t __ovld async_work_group_copy(__global short8 *dst, const __local short8 *src, size_t num_elements, event_t event);\n"
39502"event_t __ovld async_work_group_copy(__global ushort8 *dst, const __local ushort8 *src, size_t num_elements, event_t event);\n"
39503"event_t __ovld async_work_group_copy(__global int8 *dst, const __local int8 *src, size_t num_elements, event_t event);\n"
39504"event_t __ovld async_work_group_copy(__global uint8 *dst, const __local uint8 *src, size_t num_elements, event_t event);\n"
39505"event_t __ovld async_work_group_copy(__global long8 *dst, const __local long8 *src, size_t num_elements, event_t event);\n"
39506"event_t __ovld async_work_group_copy(__global ulong8 *dst, const __local ulong8 *src, size_t num_elements, event_t event);\n"
39507"event_t __ovld async_work_group_copy(__global float8 *dst, const __local float8 *src, size_t num_elements, event_t event);\n"
39508"event_t __ovld async_work_group_copy(__global char16 *dst, const __local char16 *src, size_t num_elements, event_t event);\n"
39509"event_t __ovld async_work_group_copy(__global uchar16 *dst, const __local uchar16 *src, size_t num_elements, event_t event);\n"
39510"event_t __ovld async_work_group_copy(__global short16 *dst, const __local short16 *src, size_t num_elements, event_t event);\n"
39511"event_t __ovld async_work_group_copy(__global ushort16 *dst, const __local ushort16 *src, size_t num_elements, event_t event);\n"
39512"event_t __ovld async_work_group_copy(__global int16 *dst, const __local int16 *src, size_t num_elements, event_t event);\n"
39513"event_t __ovld async_work_group_copy(__global uint16 *dst, const __local uint16 *src, size_t num_elements, event_t event);\n"
39514"event_t __ovld async_work_group_copy(__global long16 *dst, const __local long16 *src, size_t num_elements, event_t event);\n"
39515"event_t __ovld async_work_group_copy(__global ulong16 *dst, const __local ulong16 *src, size_t num_elements, event_t event);\n"
39516"event_t __ovld async_work_group_copy(__global float16 *dst, const __local float16 *src, size_t num_elements, event_t event);\n"
39517"#ifdef cl_khr_fp64\n"
39518"event_t __ovld async_work_group_copy(__local double *dst, const __global double *src, size_t num_elements, event_t event);\n"
39519"event_t __ovld async_work_group_copy(__local double2 *dst, const __global double2 *src, size_t num_elements, event_t event);\n"
39520"event_t __ovld async_work_group_copy(__local double3 *dst, const __global double3 *src, size_t num_elements, event_t event);\n"
39521"event_t __ovld async_work_group_copy(__local double4 *dst, const __global double4 *src, size_t num_elements, event_t event);\n"
39522"event_t __ovld async_work_group_copy(__local double8 *dst, const __global double8 *src, size_t num_elements, event_t event);\n"
39523"event_t __ovld async_work_group_copy(__local double16 *dst, const __global double16 *src, size_t num_elements, event_t event);\n"
39524"event_t __ovld async_work_group_copy(__global double *dst, const __local double *src, size_t num_elements, event_t event);\n"
39525"event_t __ovld async_work_group_copy(__global double2 *dst, const __local double2 *src, size_t num_elements, event_t event);\n"
39526"event_t __ovld async_work_group_copy(__global double3 *dst, const __local double3 *src, size_t num_elements, event_t event);\n"
39527"event_t __ovld async_work_group_copy(__global double4 *dst, const __local double4 *src, size_t num_elements, event_t event);\n"
39528"event_t __ovld async_work_group_copy(__global double8 *dst, const __local double8 *src, size_t num_elements, event_t event);\n"
39529"event_t __ovld async_work_group_copy(__global double16 *dst, const __local double16 *src, size_t num_elements, event_t event);\n"
39530"#endif //cl_khr_fp64\n"
39531"#ifdef cl_khr_fp16\n"
39532"event_t __ovld async_work_group_copy(__local half *dst, const __global half *src, size_t num_elements, event_t event);\n"
39533"event_t __ovld async_work_group_copy(__local half2 *dst, const __global half2 *src, size_t num_elements, event_t event);\n"
39534"event_t __ovld async_work_group_copy(__local half3 *dst, const __global half3 *src, size_t num_elements, event_t event);\n"
39535"event_t __ovld async_work_group_copy(__local half4 *dst, const __global half4 *src, size_t num_elements, event_t event);\n"
39536"event_t __ovld async_work_group_copy(__local half8 *dst, const __global half8 *src, size_t num_elements, event_t event);\n"
39537"event_t __ovld async_work_group_copy(__local half16 *dst, const __global half16 *src, size_t num_elements, event_t event);\n"
39538"event_t __ovld async_work_group_copy(__global half *dst, const __local half *src, size_t num_elements, event_t event);\n"
39539"event_t __ovld async_work_group_copy(__global half2 *dst, const __local half2 *src, size_t num_elements, event_t event);\n"
39540"event_t __ovld async_work_group_copy(__global half3 *dst, const __local half3 *src, size_t num_elements, event_t event);\n"
39541"event_t __ovld async_work_group_copy(__global half4 *dst, const __local half4 *src, size_t num_elements, event_t event);\n"
39542"event_t __ovld async_work_group_copy(__global half8 *dst, const __local half8 *src, size_t num_elements, event_t event);\n"
39543"event_t __ovld async_work_group_copy(__global half16 *dst, const __local half16 *src, size_t num_elements, event_t event);\n"
39544"#endif //cl_khr_fp16\n"
39545"\n"
39546"/**\n"
39547" * Perform an async gather of num_elements\n"
39548" * gentype elements from src to dst. The\n"
39549" * src_stride is the stride in elements for each\n"
39550" * gentype element read from src. The dst_stride\n"
39551" * is the stride in elements for each gentype\n"
39552" * element written to dst. The async gather is\n"
39553" * performed by all work-items in a work-group.\n"
39554" * This built-in function must therefore be\n"
39555" * encountered by all work-items in a work-group\n"
39556" * executing the kernel with the same argument\n"
39557" * values; otherwise the results are undefined.\n"
39558" * Returns an event object that can be used by\n"
39559" * wait_group_events to wait for the async copy\n"
39560" * to finish. The event argument can also be used\n"
39561" * to associate the\n"
39562" * async_work_group_strided_copy with a\n"
39563" * previous async copy allowing an event to be\n"
39564" * shared by multiple async copies; otherwise event\n"
39565" * should be zero.\n"
39566" * If event argument is non-zero, the event object\n"
39567" * supplied in event argument will be returned.\n"
39568" * This function does not perform any implicit\n"
39569" * synchronization of source data such as using a\n"
39570" * barrier before performing the copy.\n"
39571" */\n"
39572"event_t __ovld async_work_group_strided_copy(__local char *dst, const __global char *src, size_t num_elements, size_t src_stride, event_t event);\n"
39573"event_t __ovld async_work_group_strided_copy(__local uchar *dst, const __global uchar *src, size_t num_elements, size_t src_stride, event_t event);\n"
39574"event_t __ovld async_work_group_strided_copy(__local short *dst, const __global short *src, size_t num_elements, size_t src_stride, event_t event);\n"
39575"event_t __ovld async_work_group_strided_copy(__local ushort *dst, const __global ushort *src, size_t num_elements, size_t src_stride, event_t event);\n"
39576"event_t __ovld async_work_group_strided_copy(__local int *dst, const __global int *src, size_t num_elements, size_t src_stride, event_t event);\n"
39577"event_t __ovld async_work_group_strided_copy(__local uint *dst, const __global uint *src, size_t num_elements, size_t src_stride, event_t event);\n"
39578"event_t __ovld async_work_group_strided_copy(__local long *dst, const __global long *src, size_t num_elements, size_t src_stride, event_t event);\n"
39579"event_t __ovld async_work_group_strided_copy(__local ulong *dst, const __global ulong *src, size_t num_elements, size_t src_stride, event_t event);\n"
39580"event_t __ovld async_work_group_strided_copy(__local float *dst, const __global float *src, size_t num_elements, size_t src_stride, event_t event);\n"
39581"event_t __ovld async_work_group_strided_copy(__local char2 *dst, const __global char2 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39582"event_t __ovld async_work_group_strided_copy(__local uchar2 *dst, const __global uchar2 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39583"event_t __ovld async_work_group_strided_copy(__local short2 *dst, const __global short2 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39584"event_t __ovld async_work_group_strided_copy(__local ushort2 *dst, const __global ushort2 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39585"event_t __ovld async_work_group_strided_copy(__local int2 *dst, const __global int2 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39586"event_t __ovld async_work_group_strided_copy(__local uint2 *dst, const __global uint2 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39587"event_t __ovld async_work_group_strided_copy(__local long2 *dst, const __global long2 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39588"event_t __ovld async_work_group_strided_copy(__local ulong2 *dst, const __global ulong2 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39589"event_t __ovld async_work_group_strided_copy(__local float2 *dst, const __global float2 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39590"event_t __ovld async_work_group_strided_copy(__local char3 *dst, const __global char3 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39591"event_t __ovld async_work_group_strided_copy(__local uchar3 *dst, const __global uchar3 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39592"event_t __ovld async_work_group_strided_copy(__local short3 *dst, const __global short3 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39593"event_t __ovld async_work_group_strided_copy(__local ushort3 *dst, const __global ushort3 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39594"event_t __ovld async_work_group_strided_copy(__local int3 *dst, const __global int3 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39595"event_t __ovld async_work_group_strided_copy(__local uint3 *dst, const __global uint3 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39596"event_t __ovld async_work_group_strided_copy(__local long3 *dst, const __global long3 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39597"event_t __ovld async_work_group_strided_copy(__local ulong3 *dst, const __global ulong3 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39598"event_t __ovld async_work_group_strided_copy(__local float3 *dst, const __global float3 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39599"event_t __ovld async_work_group_strided_copy(__local char4 *dst, const __global char4 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39600"event_t __ovld async_work_group_strided_copy(__local uchar4 *dst, const __global uchar4 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39601"event_t __ovld async_work_group_strided_copy(__local short4 *dst, const __global short4 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39602"event_t __ovld async_work_group_strided_copy(__local ushort4 *dst, const __global ushort4 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39603"event_t __ovld async_work_group_strided_copy(__local int4 *dst, const __global int4 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39604"event_t __ovld async_work_group_strided_copy(__local uint4 *dst, const __global uint4 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39605"event_t __ovld async_work_group_strided_copy(__local long4 *dst, const __global long4 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39606"event_t __ovld async_work_group_strided_copy(__local ulong4 *dst, const __global ulong4 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39607"event_t __ovld async_work_group_strided_copy(__local float4 *dst, const __global float4 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39608"event_t __ovld async_work_group_strided_copy(__local char8 *dst, const __global char8 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39609"event_t __ovld async_work_group_strided_copy(__local uchar8 *dst, const __global uchar8 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39610"event_t __ovld async_work_group_strided_copy(__local short8 *dst, const __global short8 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39611"event_t __ovld async_work_group_strided_copy(__local ushort8 *dst, const __global ushort8 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39612"event_t __ovld async_work_group_strided_copy(__local int8 *dst, const __global int8 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39613"event_t __ovld async_work_group_strided_copy(__local uint8 *dst, const __global uint8 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39614"event_t __ovld async_work_group_strided_copy(__local long8 *dst, const __global long8 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39615"event_t __ovld async_work_group_strided_copy(__local ulong8 *dst, const __global ulong8 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39616"event_t __ovld async_work_group_strided_copy(__local float8 *dst, const __global float8 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39617"event_t __ovld async_work_group_strided_copy(__local char16 *dst, const __global char16 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39618"event_t __ovld async_work_group_strided_copy(__local uchar16 *dst, const __global uchar16 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39619"event_t __ovld async_work_group_strided_copy(__local short16 *dst, const __global short16 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39620"event_t __ovld async_work_group_strided_copy(__local ushort16 *dst, const __global ushort16 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39621"event_t __ovld async_work_group_strided_copy(__local int16 *dst, const __global int16 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39622"event_t __ovld async_work_group_strided_copy(__local uint16 *dst, const __global uint16 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39623"event_t __ovld async_work_group_strided_copy(__local long16 *dst, const __global long16 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39624"event_t __ovld async_work_group_strided_copy(__local ulong16 *dst, const __global ulong16 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39625"event_t __ovld async_work_group_strided_copy(__local float16 *dst, const __global float16 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39626"event_t __ovld async_work_group_strided_copy(__global char *dst, const __local char *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39627"event_t __ovld async_work_group_strided_copy(__global uchar *dst, const __local uchar *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39628"event_t __ovld async_work_group_strided_copy(__global short *dst, const __local short *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39629"event_t __ovld async_work_group_strided_copy(__global ushort *dst, const __local ushort *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39630"event_t __ovld async_work_group_strided_copy(__global int *dst, const __local int *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39631"event_t __ovld async_work_group_strided_copy(__global uint *dst, const __local uint *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39632"event_t __ovld async_work_group_strided_copy(__global long *dst, const __local long *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39633"event_t __ovld async_work_group_strided_copy(__global ulong *dst, const __local ulong *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39634"event_t __ovld async_work_group_strided_copy(__global float *dst, const __local float *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39635"event_t __ovld async_work_group_strided_copy(__global char2 *dst, const __local char2 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39636"event_t __ovld async_work_group_strided_copy(__global uchar2 *dst, const __local uchar2 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39637"event_t __ovld async_work_group_strided_copy(__global short2 *dst, const __local short2 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39638"event_t __ovld async_work_group_strided_copy(__global ushort2 *dst, const __local ushort2 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39639"event_t __ovld async_work_group_strided_copy(__global int2 *dst, const __local int2 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39640"event_t __ovld async_work_group_strided_copy(__global uint2 *dst, const __local uint2 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39641"event_t __ovld async_work_group_strided_copy(__global long2 *dst, const __local long2 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39642"event_t __ovld async_work_group_strided_copy(__global ulong2 *dst, const __local ulong2 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39643"event_t __ovld async_work_group_strided_copy(__global float2 *dst, const __local float2 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39644"event_t __ovld async_work_group_strided_copy(__global char3 *dst, const __local char3 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39645"event_t __ovld async_work_group_strided_copy(__global uchar3 *dst, const __local uchar3 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39646"event_t __ovld async_work_group_strided_copy(__global short3 *dst, const __local short3 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39647"event_t __ovld async_work_group_strided_copy(__global ushort3 *dst, const __local ushort3 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39648"event_t __ovld async_work_group_strided_copy(__global int3 *dst, const __local int3 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39649"event_t __ovld async_work_group_strided_copy(__global uint3 *dst, const __local uint3 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39650"event_t __ovld async_work_group_strided_copy(__global long3 *dst, const __local long3 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39651"event_t __ovld async_work_group_strided_copy(__global ulong3 *dst, const __local ulong3 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39652"event_t __ovld async_work_group_strided_copy(__global float3 *dst, const __local float3 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39653"event_t __ovld async_work_group_strided_copy(__global char4 *dst, const __local char4 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39654"event_t __ovld async_work_group_strided_copy(__global uchar4 *dst, const __local uchar4 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39655"event_t __ovld async_work_group_strided_copy(__global short4 *dst, const __local short4 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39656"event_t __ovld async_work_group_strided_copy(__global ushort4 *dst, const __local ushort4 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39657"event_t __ovld async_work_group_strided_copy(__global int4 *dst, const __local int4 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39658"event_t __ovld async_work_group_strided_copy(__global uint4 *dst, const __local uint4 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39659"event_t __ovld async_work_group_strided_copy(__global long4 *dst, const __local long4 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39660"event_t __ovld async_work_group_strided_copy(__global ulong4 *dst, const __local ulong4 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39661"event_t __ovld async_work_group_strided_copy(__global float4 *dst, const __local float4 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39662"event_t __ovld async_work_group_strided_copy(__global char8 *dst, const __local char8 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39663"event_t __ovld async_work_group_strided_copy(__global uchar8 *dst, const __local uchar8 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39664"event_t __ovld async_work_group_strided_copy(__global short8 *dst, const __local short8 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39665"event_t __ovld async_work_group_strided_copy(__global ushort8 *dst, const __local ushort8 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39666"event_t __ovld async_work_group_strided_copy(__global int8 *dst, const __local int8 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39667"event_t __ovld async_work_group_strided_copy(__global uint8 *dst, const __local uint8 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39668"event_t __ovld async_work_group_strided_copy(__global long8 *dst, const __local long8 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39669"event_t __ovld async_work_group_strided_copy(__global ulong8 *dst, const __local ulong8 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39670"event_t __ovld async_work_group_strided_copy(__global float8 *dst, const __local float8 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39671"event_t __ovld async_work_group_strided_copy(__global char16 *dst, const __local char16 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39672"event_t __ovld async_work_group_strided_copy(__global uchar16 *dst, const __local uchar16 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39673"event_t __ovld async_work_group_strided_copy(__global short16 *dst, const __local short16 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39674"event_t __ovld async_work_group_strided_copy(__global ushort16 *dst, const __local ushort16 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39675"event_t __ovld async_work_group_strided_copy(__global int16 *dst, const __local int16 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39676"event_t __ovld async_work_group_strided_copy(__global uint16 *dst, const __local uint16 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39677"event_t __ovld async_work_group_strided_copy(__global long16 *dst, const __local long16 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39678"event_t __ovld async_work_group_strided_copy(__global ulong16 *dst, const __local ulong16 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39679"event_t __ovld async_work_group_strided_copy(__global float16 *dst, const __local float16 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39680"#ifdef cl_khr_fp64\n"
39681"event_t __ovld async_work_group_strided_copy(__local double *dst, const __global double *src, size_t num_elements, size_t src_stride, event_t event);\n"
39682"event_t __ovld async_work_group_strided_copy(__local double2 *dst, const __global double2 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39683"event_t __ovld async_work_group_strided_copy(__local double3 *dst, const __global double3 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39684"event_t __ovld async_work_group_strided_copy(__local double4 *dst, const __global double4 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39685"event_t __ovld async_work_group_strided_copy(__local double8 *dst, const __global double8 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39686"event_t __ovld async_work_group_strided_copy(__local double16 *dst, const __global double16 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39687"event_t __ovld async_work_group_strided_copy(__global double *dst, const __local double *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39688"event_t __ovld async_work_group_strided_copy(__global double2 *dst, const __local double2 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39689"event_t __ovld async_work_group_strided_copy(__global double3 *dst, const __local double3 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39690"event_t __ovld async_work_group_strided_copy(__global double4 *dst, const __local double4 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39691"event_t __ovld async_work_group_strided_copy(__global double8 *dst, const __local double8 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39692"event_t __ovld async_work_group_strided_copy(__global double16 *dst, const __local double16 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39693"#endif //cl_khr_fp64\n"
39694"#ifdef cl_khr_fp16\n"
39695"event_t __ovld async_work_group_strided_copy(__local half *dst, const __global half *src, size_t num_elements, size_t src_stride, event_t event);\n"
39696"event_t __ovld async_work_group_strided_copy(__local half2 *dst, const __global half2 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39697"event_t __ovld async_work_group_strided_copy(__local half3 *dst, const __global half3 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39698"event_t __ovld async_work_group_strided_copy(__local half4 *dst, const __global half4 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39699"event_t __ovld async_work_group_strided_copy(__local half8 *dst, const __global half8 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39700"event_t __ovld async_work_group_strided_copy(__local half16 *dst, const __global half16 *src, size_t num_elements, size_t src_stride, event_t event);\n"
39701"event_t __ovld async_work_group_strided_copy(__global half *dst, const __local half *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39702"event_t __ovld async_work_group_strided_copy(__global half2 *dst, const __local half2 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39703"event_t __ovld async_work_group_strided_copy(__global half3 *dst, const __local half3 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39704"event_t __ovld async_work_group_strided_copy(__global half4 *dst, const __local half4 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39705"event_t __ovld async_work_group_strided_copy(__global half8 *dst, const __local half8 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39706"event_t __ovld async_work_group_strided_copy(__global half16 *dst, const __local half16 *src, size_t num_elements, size_t dst_stride, event_t event);\n"
39707"#endif //cl_khr_fp16\n"
39708"\n"
39709"/**\n"
39710" * Wait for events that identify the\n"
39711" * async_work_group_copy operations to\n"
39712" * complete. The event objects specified in\n"
39713" * event_list will be released after the wait is\n"
39714" * performed.\n"
39715" * This function must be encountered by all workitems\n"
39716" * in a work-group executing the kernel with\n"
39717" * the same num_events and event objects specified\n"
39718" * in event_list; otherwise the results are undefined.\n"
39719" */\n"
39720"void __ovld wait_group_events(int num_events, event_t *event_list);\n"
39721"\n"
39722"/**\n"
39723" * Prefetch num_elements * sizeof(gentype)\n"
39724" * bytes into the global cache. The prefetch\n"
39725" * instruction is applied to a work-item in a workgroup\n"
39726" * and does not affect the functional\n"
39727" * behavior of the kernel.\n"
39728" */\n"
39729"void __ovld prefetch(const __global char *p, size_t num_elements);\n"
39730"void __ovld prefetch(const __global uchar *p, size_t num_elements);\n"
39731"void __ovld prefetch(const __global short *p, size_t num_elements);\n"
39732"void __ovld prefetch(const __global ushort *p, size_t num_elements);\n"
39733"void __ovld prefetch(const __global int *p, size_t num_elements);\n"
39734"void __ovld prefetch(const __global uint *p, size_t num_elements);\n"
39735"void __ovld prefetch(const __global long *p, size_t num_elements);\n"
39736"void __ovld prefetch(const __global ulong *p, size_t num_elements);\n"
39737"void __ovld prefetch(const __global float *p, size_t num_elements);\n"
39738"void __ovld prefetch(const __global char2 *p, size_t num_elements);\n"
39739"void __ovld prefetch(const __global uchar2 *p, size_t num_elements);\n"
39740"void __ovld prefetch(const __global short2 *p, size_t num_elements);\n"
39741"void __ovld prefetch(const __global ushort2 *p, size_t num_elements);\n"
39742"void __ovld prefetch(const __global int2 *p, size_t num_elements);\n"
39743"void __ovld prefetch(const __global uint2 *p, size_t num_elements);\n"
39744"void __ovld prefetch(const __global long2 *p, size_t num_elements);\n"
39745"void __ovld prefetch(const __global ulong2 *p, size_t num_elements);\n"
39746"void __ovld prefetch(const __global float2 *p, size_t num_elements);\n"
39747"void __ovld prefetch(const __global char3 *p, size_t num_elements);\n"
39748"void __ovld prefetch(const __global uchar3 *p, size_t num_elements);\n"
39749"void __ovld prefetch(const __global short3 *p, size_t num_elements);\n"
39750"void __ovld prefetch(const __global ushort3 *p, size_t num_elements);\n"
39751"void __ovld prefetch(const __global int3 *p, size_t num_elements);\n"
39752"void __ovld prefetch(const __global uint3 *p, size_t num_elements);\n"
39753"void __ovld prefetch(const __global long3 *p, size_t num_elements);\n"
39754"void __ovld prefetch(const __global ulong3 *p, size_t num_elements);\n"
39755"void __ovld prefetch(const __global float3 *p, size_t num_elements);\n"
39756"void __ovld prefetch(const __global char4 *p, size_t num_elements);\n"
39757"void __ovld prefetch(const __global uchar4 *p, size_t num_elements);\n"
39758"void __ovld prefetch(const __global short4 *p, size_t num_elements);\n"
39759"void __ovld prefetch(const __global ushort4 *p, size_t num_elements);\n"
39760"void __ovld prefetch(const __global int4 *p, size_t num_elements);\n"
39761"void __ovld prefetch(const __global uint4 *p, size_t num_elements);\n"
39762"void __ovld prefetch(const __global long4 *p, size_t num_elements);\n"
39763"void __ovld prefetch(const __global ulong4 *p, size_t num_elements);\n"
39764"void __ovld prefetch(const __global float4 *p, size_t num_elements);\n"
39765"void __ovld prefetch(const __global char8 *p, size_t num_elements);\n"
39766"void __ovld prefetch(const __global uchar8 *p, size_t num_elements);\n"
39767"void __ovld prefetch(const __global short8 *p, size_t num_elements);\n"
39768"void __ovld prefetch(const __global ushort8 *p, size_t num_elements);\n"
39769"void __ovld prefetch(const __global int8 *p, size_t num_elements);\n"
39770"void __ovld prefetch(const __global uint8 *p, size_t num_elements);\n"
39771"void __ovld prefetch(const __global long8 *p, size_t num_elements);\n"
39772"void __ovld prefetch(const __global ulong8 *p, size_t num_elements);\n"
39773"void __ovld prefetch(const __global float8 *p, size_t num_elements);\n"
39774"void __ovld prefetch(const __global char16 *p, size_t num_elements);\n"
39775"void __ovld prefetch(const __global uchar16 *p, size_t num_elements);\n"
39776"void __ovld prefetch(const __global short16 *p, size_t num_elements);\n"
39777"void __ovld prefetch(const __global ushort16 *p, size_t num_elements);\n"
39778"void __ovld prefetch(const __global int16 *p, size_t num_elements);\n"
39779"void __ovld prefetch(const __global uint16 *p, size_t num_elements);\n"
39780"void __ovld prefetch(const __global long16 *p, size_t num_elements);\n"
39781"void __ovld prefetch(const __global ulong16 *p, size_t num_elements);\n"
39782"void __ovld prefetch(const __global float16 *p, size_t num_elements);\n"
39783"#ifdef cl_khr_fp64\n"
39784"void __ovld prefetch(const __global double *p, size_t num_elements);\n"
39785"void __ovld prefetch(const __global double2 *p, size_t num_elements);\n"
39786"void __ovld prefetch(const __global double3 *p, size_t num_elements);\n"
39787"void __ovld prefetch(const __global double4 *p, size_t num_elements);\n"
39788"void __ovld prefetch(const __global double8 *p, size_t num_elements);\n"
39789"void __ovld prefetch(const __global double16 *p, size_t num_elements);\n"
39790"#endif //cl_khr_fp64\n"
39791"#ifdef cl_khr_fp16\n"
39792"void __ovld prefetch(const __global half *p, size_t num_elements);\n"
39793"void __ovld prefetch(const __global half2 *p, size_t num_elements);\n"
39794"void __ovld prefetch(const __global half3 *p, size_t num_elements);\n"
39795"void __ovld prefetch(const __global half4 *p, size_t num_elements);\n"
39796"void __ovld prefetch(const __global half8 *p, size_t num_elements);\n"
39797"void __ovld prefetch(const __global half16 *p, size_t num_elements);\n"
39798"#endif // cl_khr_fp16\n"
39799"\n"
39800"// OpenCL v1.1 s6.11.1, v1.2 s6.12.11 - Atomic Functions\n"
39801"\n"
39802"#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n"
39803"#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n"
39804"#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n"
39805"#endif\n"
39806"/**\n"
39807" * Read the 32-bit value (referred to as old)\n"
39808" * stored at location pointed by p. Compute\n"
39809" * (old + val) and store result at location\n"
39810" * pointed by p. The function returns old.\n"
39811" */\n"
39812"int __ovld atomic_add(volatile __global int *p, int val);\n"
39813"unsigned int __ovld atomic_add(volatile __global unsigned int *p, unsigned int val);\n"
39814"int __ovld atomic_add(volatile __local int *p, int val);\n"
39815"unsigned int __ovld atomic_add(volatile __local unsigned int *p, unsigned int val);\n"
39816"\n"
39817"#if defined(cl_khr_global_int32_base_atomics)\n"
39818"int __ovld atom_add(volatile __global int *p, int val);\n"
39819"unsigned int __ovld atom_add(volatile __global unsigned int *p, unsigned int val);\n"
39820"#endif\n"
39821"#if defined(cl_khr_local_int32_base_atomics)\n"
39822"int __ovld atom_add(volatile __local int *p, int val);\n"
39823"unsigned int __ovld atom_add(volatile __local unsigned int *p, unsigned int val);\n"
39824"#endif\n"
39825"\n"
39826"#if defined(cl_khr_int64_base_atomics)\n"
39827"long __ovld atom_add(volatile __global long *p, long val);\n"
39828"unsigned long __ovld atom_add(volatile __global unsigned long *p, unsigned long val);\n"
39829"long __ovld atom_add(volatile __local long *p, long val);\n"
39830"unsigned long __ovld atom_add(volatile __local unsigned long *p, unsigned long val);\n"
39831"#endif\n"
39832"\n"
39833"/**\n"
39834" * Read the 32-bit value (referred to as old) stored at location pointed by p.\n"
39835" * Compute (old - val) and store result at location pointed by p. The function\n"
39836" * returns old.\n"
39837" */\n"
39838"int __ovld atomic_sub(volatile __global int *p, int val);\n"
39839"unsigned int __ovld atomic_sub(volatile __global unsigned int *p, unsigned int val);\n"
39840"int __ovld atomic_sub(volatile __local int *p, int val);\n"
39841"unsigned int __ovld atomic_sub(volatile __local unsigned int *p, unsigned int val);\n"
39842"\n"
39843"#if defined(cl_khr_global_int32_base_atomics)\n"
39844"int __ovld atom_sub(volatile __global int *p, int val);\n"
39845"unsigned int __ovld atom_sub(volatile __global unsigned int *p, unsigned int val);\n"
39846"#endif\n"
39847"#if defined(cl_khr_local_int32_base_atomics)\n"
39848"int __ovld atom_sub(volatile __local int *p, int val);\n"
39849"unsigned int __ovld atom_sub(volatile __local unsigned int *p, unsigned int val);\n"
39850"#endif\n"
39851"\n"
39852"#if defined(cl_khr_int64_base_atomics)\n"
39853"long __ovld atom_sub(volatile __global long *p, long val);\n"
39854"unsigned long __ovld atom_sub(volatile __global unsigned long *p, unsigned long val);\n"
39855"long __ovld atom_sub(volatile __local long *p, long val);\n"
39856"unsigned long __ovld atom_sub(volatile __local unsigned long *p, unsigned long val);\n"
39857"#endif\n"
39858"\n"
39859"/**\n"
39860" * Swaps the old value stored at location p\n"
39861" * with new value given by val. Returns old\n"
39862" * value.\n"
39863" */\n"
39864"int __ovld atomic_xchg(volatile __global int *p, int val);\n"
39865"unsigned int __ovld atomic_xchg(volatile __global unsigned int *p, unsigned int val);\n"
39866"int __ovld atomic_xchg(volatile __local int *p, int val);\n"
39867"unsigned int __ovld atomic_xchg(volatile __local unsigned int *p, unsigned int val);\n"
39868"float __ovld atomic_xchg(volatile __global float *p, float val);\n"
39869"float __ovld atomic_xchg(volatile __local float *p, float val);\n"
39870"\n"
39871"#if defined(cl_khr_global_int32_base_atomics)\n"
39872"int __ovld atom_xchg(volatile __global int *p, int val);\n"
39873"unsigned int __ovld atom_xchg(volatile __global unsigned int *p, unsigned int val);\n"
39874"#endif\n"
39875"#if defined(cl_khr_local_int32_base_atomics)\n"
39876"int __ovld atom_xchg(volatile __local int *p, int val);\n"
39877"unsigned int __ovld atom_xchg(volatile __local unsigned int *p, unsigned int val);\n"
39878"#endif\n"
39879"\n"
39880"#if defined(cl_khr_int64_base_atomics)\n"
39881"long __ovld atom_xchg(volatile __global long *p, long val);\n"
39882"long __ovld atom_xchg(volatile __local long *p, long val);\n"
39883"unsigned long __ovld atom_xchg(volatile __global unsigned long *p, unsigned long val);\n"
39884"unsigned long __ovld atom_xchg(volatile __local unsigned long *p, unsigned long val);\n"
39885"#endif\n"
39886"\n"
39887"/**\n"
39888" * Read the 32-bit value (referred to as old)\n"
39889" * stored at location pointed by p. Compute\n"
39890" * (old + 1) and store result at location\n"
39891" * pointed by p. The function returns old.\n"
39892" */\n"
39893"int __ovld atomic_inc(volatile __global int *p);\n"
39894"unsigned int __ovld atomic_inc(volatile __global unsigned int *p);\n"
39895"int __ovld atomic_inc(volatile __local int *p);\n"
39896"unsigned int __ovld atomic_inc(volatile __local unsigned int *p);\n"
39897"\n"
39898"#if defined(cl_khr_global_int32_base_atomics)\n"
39899"int __ovld atom_inc(volatile __global int *p);\n"
39900"unsigned int __ovld atom_inc(volatile __global unsigned int *p);\n"
39901"#endif\n"
39902"#if defined(cl_khr_local_int32_base_atomics)\n"
39903"int __ovld atom_inc(volatile __local int *p);\n"
39904"unsigned int __ovld atom_inc(volatile __local unsigned int *p);\n"
39905"#endif\n"
39906"\n"
39907"#if defined(cl_khr_int64_base_atomics)\n"
39908"long __ovld atom_inc(volatile __global long *p);\n"
39909"unsigned long __ovld atom_inc(volatile __global unsigned long *p);\n"
39910"long __ovld atom_inc(volatile __local long *p);\n"
39911"unsigned long __ovld atom_inc(volatile __local unsigned long *p);\n"
39912"#endif\n"
39913"\n"
39914"/**\n"
39915" * Read the 32-bit value (referred to as old)\n"
39916" * stored at location pointed by p. Compute\n"
39917" * (old - 1) and store result at location\n"
39918" * pointed by p. The function returns old.\n"
39919" */\n"
39920"int __ovld atomic_dec(volatile __global int *p);\n"
39921"unsigned int __ovld atomic_dec(volatile __global unsigned int *p);\n"
39922"int __ovld atomic_dec(volatile __local int *p);\n"
39923"unsigned int __ovld atomic_dec(volatile __local unsigned int *p);\n"
39924"\n"
39925"#if defined(cl_khr_global_int32_base_atomics)\n"
39926"int __ovld atom_dec(volatile __global int *p);\n"
39927"unsigned int __ovld atom_dec(volatile __global unsigned int *p);\n"
39928"#endif\n"
39929"#if defined(cl_khr_local_int32_base_atomics)\n"
39930"int __ovld atom_dec(volatile __local int *p);\n"
39931"unsigned int __ovld atom_dec(volatile __local unsigned int *p);\n"
39932"#endif\n"
39933"\n"
39934"#if defined(cl_khr_int64_base_atomics)\n"
39935"long __ovld atom_dec(volatile __global long *p);\n"
39936"unsigned long __ovld atom_dec(volatile __global unsigned long *p);\n"
39937"long __ovld atom_dec(volatile __local long *p);\n"
39938"unsigned long __ovld atom_dec(volatile __local unsigned long *p);\n"
39939"#endif\n"
39940"\n"
39941"/**\n"
39942" * Read the 32-bit value (referred to as old)\n"
39943" * stored at location pointed by p. Compute\n"
39944" * (old == cmp) ? val : old and store result at\n"
39945" * location pointed by p. The function\n"
39946" * returns old.\n"
39947" */\n"
39948"int __ovld atomic_cmpxchg(volatile __global int *p, int cmp, int val);\n"
39949"unsigned int __ovld atomic_cmpxchg(volatile __global unsigned int *p, unsigned int cmp, unsigned int val);\n"
39950"int __ovld atomic_cmpxchg(volatile __local int *p, int cmp, int val);\n"
39951"unsigned int __ovld atomic_cmpxchg(volatile __local unsigned int *p, unsigned int cmp, unsigned int val);\n"
39952"\n"
39953"#if defined(cl_khr_global_int32_base_atomics)\n"
39954"int __ovld atom_cmpxchg(volatile __global int *p, int cmp, int val);\n"
39955"unsigned int __ovld atom_cmpxchg(volatile __global unsigned int *p, unsigned int cmp, unsigned int val);\n"
39956"#endif\n"
39957"#if defined(cl_khr_local_int32_base_atomics)\n"
39958"int __ovld atom_cmpxchg(volatile __local int *p, int cmp, int val);\n"
39959"unsigned int __ovld atom_cmpxchg(volatile __local unsigned int *p, unsigned int cmp, unsigned int val);\n"
39960"#endif\n"
39961"\n"
39962"#if defined(cl_khr_int64_base_atomics)\n"
39963"long __ovld atom_cmpxchg(volatile __global long *p, long cmp, long val);\n"
39964"unsigned long __ovld atom_cmpxchg(volatile __global unsigned long *p, unsigned long cmp, unsigned long val);\n"
39965"long __ovld atom_cmpxchg(volatile __local long *p, long cmp, long val);\n"
39966"unsigned long __ovld atom_cmpxchg(volatile __local unsigned long *p, unsigned long cmp, unsigned long val);\n"
39967"#endif\n"
39968"\n"
39969"/**\n"
39970" * Read the 32-bit value (referred to as old)\n"
39971" * stored at location pointed by p. Compute\n"
39972" * min(old, val) and store minimum value at\n"
39973" * location pointed by p. The function\n"
39974" * returns old.\n"
39975" */\n"
39976"int __ovld atomic_min(volatile __global int *p, int val);\n"
39977"unsigned int __ovld atomic_min(volatile __global unsigned int *p, unsigned int val);\n"
39978"int __ovld atomic_min(volatile __local int *p, int val);\n"
39979"unsigned int __ovld atomic_min(volatile __local unsigned int *p, unsigned int val);\n"
39980"\n"
39981"#if defined(cl_khr_global_int32_extended_atomics)\n"
39982"int __ovld atom_min(volatile __global int *p, int val);\n"
39983"unsigned int __ovld atom_min(volatile __global unsigned int *p, unsigned int val);\n"
39984"#endif\n"
39985"#if defined(cl_khr_local_int32_extended_atomics)\n"
39986"int __ovld atom_min(volatile __local int *p, int val);\n"
39987"unsigned int __ovld atom_min(volatile __local unsigned int *p, unsigned int val);\n"
39988"#endif\n"
39989"\n"
39990"#if defined(cl_khr_int64_extended_atomics)\n"
39991"long __ovld atom_min(volatile __global long *p, long val);\n"
39992"unsigned long __ovld atom_min(volatile __global unsigned long *p, unsigned long val);\n"
39993"long __ovld atom_min(volatile __local long *p, long val);\n"
39994"unsigned long __ovld atom_min(volatile __local unsigned long *p, unsigned long val);\n"
39995"#endif\n"
39996"\n"
39997"/**\n"
39998" * Read the 32-bit value (referred to as old)\n"
39999" * stored at location pointed by p. Compute\n"
40000" * max(old, val) and store maximum value at\n"
40001" * location pointed by p. The function\n"
40002" * returns old.\n"
40003" */\n"
40004"int __ovld atomic_max(volatile __global int *p, int val);\n"
40005"unsigned int __ovld atomic_max(volatile __global unsigned int *p, unsigned int val);\n"
40006"int __ovld atomic_max(volatile __local int *p, int val);\n"
40007"unsigned int __ovld atomic_max(volatile __local unsigned int *p, unsigned int val);\n"
40008"\n"
40009"#if defined(cl_khr_global_int32_extended_atomics)\n"
40010"int __ovld atom_max(volatile __global int *p, int val);\n"
40011"unsigned int __ovld atom_max(volatile __global unsigned int *p, unsigned int val);\n"
40012"#endif\n"
40013"#if defined(cl_khr_local_int32_extended_atomics)\n"
40014"int __ovld atom_max(volatile __local int *p, int val);\n"
40015"unsigned int __ovld atom_max(volatile __local unsigned int *p, unsigned int val);\n"
40016"#endif\n"
40017"\n"
40018"#if defined(cl_khr_int64_extended_atomics)\n"
40019"long __ovld atom_max(volatile __global long *p, long val);\n"
40020"unsigned long __ovld atom_max(volatile __global unsigned long *p, unsigned long val);\n"
40021"long __ovld atom_max(volatile __local long *p, long val);\n"
40022"unsigned long __ovld atom_max(volatile __local unsigned long *p, unsigned long val);\n"
40023"#endif\n"
40024"\n"
40025"/**\n"
40026" * Read the 32-bit value (referred to as old)\n"
40027" * stored at location pointed by p. Compute\n"
40028" * (old & val) and store result at location\n"
40029" * pointed by p. The function returns old.\n"
40030" */\n"
40031"int __ovld atomic_and(volatile __global int *p, int val);\n"
40032"unsigned int __ovld atomic_and(volatile __global unsigned int *p, unsigned int val);\n"
40033"int __ovld atomic_and(volatile __local int *p, int val);\n"
40034"unsigned int __ovld atomic_and(volatile __local unsigned int *p, unsigned int val);\n"
40035"\n"
40036"#if defined(cl_khr_global_int32_extended_atomics)\n"
40037"int __ovld atom_and(volatile __global int *p, int val);\n"
40038"unsigned int __ovld atom_and(volatile __global unsigned int *p, unsigned int val);\n"
40039"#endif\n"
40040"#if defined(cl_khr_local_int32_extended_atomics)\n"
40041"int __ovld atom_and(volatile __local int *p, int val);\n"
40042"unsigned int __ovld atom_and(volatile __local unsigned int *p, unsigned int val);\n"
40043"#endif\n"
40044"\n"
40045"#if defined(cl_khr_int64_extended_atomics)\n"
40046"long __ovld atom_and(volatile __global long *p, long val);\n"
40047"unsigned long __ovld atom_and(volatile __global unsigned long *p, unsigned long val);\n"
40048"long __ovld atom_and(volatile __local long *p, long val);\n"
40049"unsigned long __ovld atom_and(volatile __local unsigned long *p, unsigned long val);\n"
40050"#endif\n"
40051"\n"
40052"/**\n"
40053" * Read the 32-bit value (referred to as old)\n"
40054" * stored at location pointed by p. Compute\n"
40055" * (old | val) and store result at location\n"
40056" * pointed by p. The function returns old.\n"
40057" */\n"
40058"int __ovld atomic_or(volatile __global int *p, int val);\n"
40059"unsigned int __ovld atomic_or(volatile __global unsigned int *p, unsigned int val);\n"
40060"int __ovld atomic_or(volatile __local int *p, int val);\n"
40061"unsigned int __ovld atomic_or(volatile __local unsigned int *p, unsigned int val);\n"
40062"\n"
40063"#if defined(cl_khr_global_int32_extended_atomics)\n"
40064"int __ovld atom_or(volatile __global int *p, int val);\n"
40065"unsigned int __ovld atom_or(volatile __global unsigned int *p, unsigned int val);\n"
40066"#endif\n"
40067"#if defined(cl_khr_local_int32_extended_atomics)\n"
40068"int __ovld atom_or(volatile __local int *p, int val);\n"
40069"unsigned int __ovld atom_or(volatile __local unsigned int *p, unsigned int val);\n"
40070"#endif\n"
40071"\n"
40072"#if defined(cl_khr_int64_extended_atomics)\n"
40073"long __ovld atom_or(volatile __global long *p, long val);\n"
40074"unsigned long __ovld atom_or(volatile __global unsigned long *p, unsigned long val);\n"
40075"long __ovld atom_or(volatile __local long *p, long val);\n"
40076"unsigned long __ovld atom_or(volatile __local unsigned long *p, unsigned long val);\n"
40077"#endif\n"
40078"\n"
40079"/**\n"
40080" * Read the 32-bit value (referred to as old)\n"
40081" * stored at location pointed by p. Compute\n"
40082" * (old ^ val) and store result at location\n"
40083" * pointed by p. The function returns old.\n"
40084" */\n"
40085"int __ovld atomic_xor(volatile __global int *p, int val);\n"
40086"unsigned int __ovld atomic_xor(volatile __global unsigned int *p, unsigned int val);\n"
40087"int __ovld atomic_xor(volatile __local int *p, int val);\n"
40088"unsigned int __ovld atomic_xor(volatile __local unsigned int *p, unsigned int val);\n"
40089"\n"
40090"#if defined(cl_khr_global_int32_extended_atomics)\n"
40091"int __ovld atom_xor(volatile __global int *p, int val);\n"
40092"unsigned int __ovld atom_xor(volatile __global unsigned int *p, unsigned int val);\n"
40093"#endif\n"
40094"#if defined(cl_khr_local_int32_extended_atomics)\n"
40095"int __ovld atom_xor(volatile __local int *p, int val);\n"
40096"unsigned int __ovld atom_xor(volatile __local unsigned int *p, unsigned int val);\n"
40097"#endif\n"
40098"\n"
40099"#if defined(cl_khr_int64_extended_atomics)\n"
40100"long __ovld atom_xor(volatile __global long *p, long val);\n"
40101"unsigned long __ovld atom_xor(volatile __global unsigned long *p, unsigned long val);\n"
40102"long __ovld atom_xor(volatile __local long *p, long val);\n"
40103"unsigned long __ovld atom_xor(volatile __local unsigned long *p, unsigned long val);\n"
40104"#endif\n"
40105"\n"
40106"#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n"
40107"#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : disable\n"
40108"#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : disable\n"
40109"#endif\n"
40110"\n"
40111"// OpenCL v2.0 s6.13.11 - Atomics Functions\n"
40112"\n"
40113"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
40114"#ifndef ATOMIC_VAR_INIT\n"
40115"#define ATOMIC_VAR_INIT(x) (x)\n"
40116"#endif //ATOMIC_VAR_INIT\n"
40117"#define ATOMIC_FLAG_INIT 0\n"
40118"\n"
40119"// enum values aligned with what clang uses in EmitAtomicExpr()\n"
40120"typedef enum memory_order\n"
40121"{\n"
40122" memory_order_relaxed = __ATOMIC_RELAXED,\n"
40123" memory_order_acquire = __ATOMIC_ACQUIRE,\n"
40124" memory_order_release = __ATOMIC_RELEASE,\n"
40125" memory_order_acq_rel = __ATOMIC_ACQ_REL,\n"
40126" memory_order_seq_cst = __ATOMIC_SEQ_CST\n"
40127"} memory_order;\n"
40128"\n"
40129"// double atomics support requires extensions cl_khr_int64_base_atomics and cl_khr_int64_extended_atomics\n"
40130"#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n"
40131"#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n"
40132"#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n"
40133"#endif\n"
40134"\n"
40135"// atomic_init()\n"
40136"void __ovld atomic_init(volatile atomic_int *object, int value);\n"
40137"void __ovld atomic_init(volatile atomic_uint *object, uint value);\n"
40138"void __ovld atomic_init(volatile atomic_float *object, float value);\n"
40139"#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n"
40140"void __ovld atomic_init(volatile atomic_long *object, long value);\n"
40141"void __ovld atomic_init(volatile atomic_ulong *object, ulong value);\n"
40142"#ifdef cl_khr_fp64\n"
40143"void __ovld atomic_init(volatile atomic_double *object, double value);\n"
40144"#endif //cl_khr_fp64\n"
40145"#endif\n"
40146"\n"
40147"// atomic_work_item_fence()\n"
40148"void __ovld atomic_work_item_fence(cl_mem_fence_flags flags, memory_order order, memory_scope scope);\n"
40149"\n"
40150"// atomic_fetch()\n"
40151"\n"
40152"int __ovld atomic_fetch_add(volatile atomic_int *object, int operand);\n"
40153"int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, memory_order order);\n"
40154"int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);\n"
40155"uint __ovld atomic_fetch_add(volatile atomic_uint *object, uint operand);\n"
40156"uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, uint operand, memory_order order);\n"
40157"uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);\n"
40158"int __ovld atomic_fetch_sub(volatile atomic_int *object, int operand);\n"
40159"int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, memory_order order);\n"
40160"int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);\n"
40161"uint __ovld atomic_fetch_sub(volatile atomic_uint *object, uint operand);\n"
40162"uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, uint operand, memory_order order);\n"
40163"uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);\n"
40164"int __ovld atomic_fetch_or(volatile atomic_int *object, int operand);\n"
40165"int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, memory_order order);\n"
40166"int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);\n"
40167"uint __ovld atomic_fetch_or(volatile atomic_uint *object, uint operand);\n"
40168"uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, memory_order order);\n"
40169"uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);\n"
40170"int __ovld atomic_fetch_xor(volatile atomic_int *object, int operand);\n"
40171"int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, memory_order order);\n"
40172"int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);\n"
40173"uint __ovld atomic_fetch_xor(volatile atomic_uint *object, uint operand);\n"
40174"uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, uint operand, memory_order order);\n"
40175"uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);\n"
40176"int __ovld atomic_fetch_and(volatile atomic_int *object, int operand);\n"
40177"int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, memory_order order);\n"
40178"int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);\n"
40179"uint __ovld atomic_fetch_and(volatile atomic_uint *object, uint operand);\n"
40180"uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, uint operand, memory_order order);\n"
40181"uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);\n"
40182"int __ovld atomic_fetch_min(volatile atomic_int *object, int operand);\n"
40183"int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, memory_order order);\n"
40184"int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);\n"
40185"uint __ovld atomic_fetch_min(volatile atomic_uint *object, uint operand);\n"
40186"uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, uint operand, memory_order order);\n"
40187"uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);\n"
40188"uint __ovld atomic_fetch_min(volatile atomic_uint *object, int operand);\n"
40189"uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, int operand, memory_order order);\n"
40190"uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, int operand, memory_order order, memory_scope scope);\n"
40191"int __ovld atomic_fetch_max(volatile atomic_int *object, int operand);\n"
40192"int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, memory_order order);\n"
40193"int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);\n"
40194"uint __ovld atomic_fetch_max(volatile atomic_uint *object, uint operand);\n"
40195"uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, uint operand, memory_order order);\n"
40196"uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);\n"
40197"uint __ovld atomic_fetch_max(volatile atomic_uint *object, int operand);\n"
40198"uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, int operand, memory_order order);\n"
40199"uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, int operand, memory_order order, memory_scope scope);\n"
40200"\n"
40201"#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n"
40202"long __ovld atomic_fetch_add(volatile atomic_long *object, long operand);\n"
40203"long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, long operand, memory_order order);\n"
40204"long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);\n"
40205"ulong __ovld atomic_fetch_add(volatile atomic_ulong *object, ulong operand);\n"
40206"ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);\n"
40207"ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);\n"
40208"long __ovld atomic_fetch_sub(volatile atomic_long *object, long operand);\n"
40209"long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, long operand, memory_order order);\n"
40210"long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);\n"
40211"ulong __ovld atomic_fetch_sub(volatile atomic_ulong *object, ulong operand);\n"
40212"ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);\n"
40213"ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);\n"
40214"long __ovld atomic_fetch_or(volatile atomic_long *object, long operand);\n"
40215"long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, memory_order order);\n"
40216"long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);\n"
40217"ulong __ovld atomic_fetch_or(volatile atomic_ulong *object, ulong operand);\n"
40218"ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);\n"
40219"ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);\n"
40220"long __ovld atomic_fetch_xor(volatile atomic_long *object, long operand);\n"
40221"long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, long operand, memory_order order);\n"
40222"long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);\n"
40223"ulong __ovld atomic_fetch_xor(volatile atomic_ulong *object, ulong operand);\n"
40224"ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);\n"
40225"ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);\n"
40226"long __ovld atomic_fetch_and(volatile atomic_long *object, long operand);\n"
40227"long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, long operand, memory_order order);\n"
40228"long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);\n"
40229"ulong __ovld atomic_fetch_and(volatile atomic_ulong *object, ulong operand);\n"
40230"ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);\n"
40231"ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);\n"
40232"long __ovld atomic_fetch_min(volatile atomic_long *object, long operand);\n"
40233"long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, long operand, memory_order order);\n"
40234"long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);\n"
40235"ulong __ovld atomic_fetch_min(volatile atomic_ulong *object, ulong operand);\n"
40236"ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);\n"
40237"ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);\n"
40238"ulong __ovld atomic_fetch_min(volatile atomic_ulong *object, long operand);\n"
40239"ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, long operand, memory_order order);\n"
40240"ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, long operand, memory_order order, memory_scope scope);\n"
40241"long __ovld atomic_fetch_max(volatile atomic_long *object, long operand);\n"
40242"long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, long operand, memory_order order);\n"
40243"long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);\n"
40244"ulong __ovld atomic_fetch_max(volatile atomic_ulong *object, ulong operand);\n"
40245"ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);\n"
40246"ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);\n"
40247"ulong __ovld atomic_fetch_max(volatile atomic_ulong *object, long operand);\n"
40248"ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, long operand, memory_order order);\n"
40249"ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, long operand, memory_order order, memory_scope scope);\n"
40250"#endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n"
40251"\n"
40252"// OpenCL v2.0 s6.13.11.7.5:\n"
40253"// add/sub: atomic type argument can be uintptr_t/intptr_t, value type argument can be ptrdiff_t.\n"
40254"// or/xor/and/min/max: atomic type argument can be intptr_t/uintptr_t, value type argument can be intptr_t/uintptr_t.\n"
40255"\n"
40256"#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n"
40257"uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t *object, ptrdiff_t operand);\n"
40258"uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order);\n"
40259"uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope);\n"
40260"uintptr_t __ovld atomic_fetch_sub(volatile atomic_uintptr_t *object, ptrdiff_t operand);\n"
40261"uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order);\n"
40262"uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope);\n"
40263"\n"
40264"uintptr_t __ovld atomic_fetch_or(volatile atomic_uintptr_t *object, intptr_t operand);\n"
40265"uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order);\n"
40266"uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope);\n"
40267"uintptr_t __ovld atomic_fetch_xor(volatile atomic_uintptr_t *object, intptr_t operand);\n"
40268"uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order);\n"
40269"uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope);\n"
40270"uintptr_t __ovld atomic_fetch_and(volatile atomic_uintptr_t *object, intptr_t operand);\n"
40271"uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order);\n"
40272"uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope);\n"
40273"uintptr_t __ovld atomic_fetch_min(volatile atomic_uintptr_t *object, intptr_t opermax);\n"
40274"uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder);\n"
40275"uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder, memory_scope scope);\n"
40276"uintptr_t __ovld atomic_fetch_max(volatile atomic_uintptr_t *object, intptr_t opermax);\n"
40277"uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder);\n"
40278"uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder, memory_scope scope);\n"
40279"\n"
40280"intptr_t __ovld atomic_fetch_or(volatile atomic_intptr_t *object, uintptr_t operand);\n"
40281"intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order);\n"
40282"intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope);\n"
40283"intptr_t __ovld atomic_fetch_xor(volatile atomic_intptr_t *object, uintptr_t operand);\n"
40284"intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order);\n"
40285"intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope);\n"
40286"intptr_t __ovld atomic_fetch_and(volatile atomic_intptr_t *object, uintptr_t operand);\n"
40287"intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order);\n"
40288"intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope);\n"
40289"intptr_t __ovld atomic_fetch_min(volatile atomic_intptr_t *object, uintptr_t opermax);\n"
40290"intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder);\n"
40291"intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder, memory_scope scope);\n"
40292"intptr_t __ovld atomic_fetch_max(volatile atomic_intptr_t *object, uintptr_t opermax);\n"
40293"intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder);\n"
40294"intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder, memory_scope scope);\n"
40295"#endif\n"
40296"\n"
40297"// atomic_store()\n"
40298"\n"
40299"void __ovld atomic_store(volatile atomic_int *object, int desired);\n"
40300"void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, memory_order order);\n"
40301"void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, memory_order order, memory_scope scope);\n"
40302"void __ovld atomic_store(volatile atomic_uint *object, uint desired);\n"
40303"void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, memory_order order);\n"
40304"void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, memory_order order, memory_scope scope);\n"
40305"void __ovld atomic_store(volatile atomic_float *object, float desired);\n"
40306"void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, memory_order order);\n"
40307"void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, memory_order order, memory_scope scope);\n"
40308"#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n"
40309"#ifdef cl_khr_fp64\n"
40310"void __ovld atomic_store(volatile atomic_double *object, double desired);\n"
40311"void __ovld atomic_store_explicit(volatile atomic_double *object, double desired, memory_order order);\n"
40312"void __ovld atomic_store_explicit(volatile atomic_double *object, double desired, memory_order order, memory_scope scope);\n"
40313"#endif //cl_khr_fp64\n"
40314"void __ovld atomic_store(volatile atomic_long *object, long desired);\n"
40315"void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, memory_order order);\n"
40316"void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, memory_order order, memory_scope scope);\n"
40317"void __ovld atomic_store(volatile atomic_ulong *object, ulong desired);\n"
40318"void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, memory_order order);\n"
40319"void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, memory_order order, memory_scope scope);\n"
40320"#endif\n"
40321"\n"
40322"// atomic_load()\n"
40323"\n"
40324"int __ovld atomic_load(volatile atomic_int *object);\n"
40325"int __ovld atomic_load_explicit(volatile atomic_int *object, memory_order order);\n"
40326"int __ovld atomic_load_explicit(volatile atomic_int *object, memory_order order, memory_scope scope);\n"
40327"uint __ovld atomic_load(volatile atomic_uint *object);\n"
40328"uint __ovld atomic_load_explicit(volatile atomic_uint *object, memory_order order);\n"
40329"uint __ovld atomic_load_explicit(volatile atomic_uint *object, memory_order order, memory_scope scope);\n"
40330"float __ovld atomic_load(volatile atomic_float *object);\n"
40331"float __ovld atomic_load_explicit(volatile atomic_float *object, memory_order order);\n"
40332"float __ovld atomic_load_explicit(volatile atomic_float *object, memory_order order, memory_scope scope);\n"
40333"#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n"
40334"#ifdef cl_khr_fp64\n"
40335"double __ovld atomic_load(volatile atomic_double *object);\n"
40336"double __ovld atomic_load_explicit(volatile atomic_double *object, memory_order order);\n"
40337"double __ovld atomic_load_explicit(volatile atomic_double *object, memory_order order, memory_scope scope);\n"
40338"#endif //cl_khr_fp64\n"
40339"long __ovld atomic_load(volatile atomic_long *object);\n"
40340"long __ovld atomic_load_explicit(volatile atomic_long *object, memory_order order);\n"
40341"long __ovld atomic_load_explicit(volatile atomic_long *object, memory_order order, memory_scope scope);\n"
40342"ulong __ovld atomic_load(volatile atomic_ulong *object);\n"
40343"ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, memory_order order);\n"
40344"ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, memory_order order, memory_scope scope);\n"
40345"#endif\n"
40346"\n"
40347"// atomic_exchange()\n"
40348"\n"
40349"int __ovld atomic_exchange(volatile atomic_int *object, int desired);\n"
40350"int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, memory_order order);\n"
40351"int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, memory_order order, memory_scope scope);\n"
40352"uint __ovld atomic_exchange(volatile atomic_uint *object, uint desired);\n"
40353"uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, memory_order order);\n"
40354"uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, memory_order order, memory_scope scope);\n"
40355"float __ovld atomic_exchange(volatile atomic_float *object, float desired);\n"
40356"float __ovld atomic_exchange_explicit(volatile atomic_float *object, float desired, memory_order order);\n"
40357"float __ovld atomic_exchange_explicit(volatile atomic_float *object, float desired, memory_order order, memory_scope scope);\n"
40358"#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n"
40359"#ifdef cl_khr_fp64\n"
40360"double __ovld atomic_exchange(volatile atomic_double *object, double desired);\n"
40361"double __ovld atomic_exchange_explicit(volatile atomic_double *object, double desired, memory_order order);\n"
40362"double __ovld atomic_exchange_explicit(volatile atomic_double *object, double desired, memory_order order, memory_scope scope);\n"
40363"#endif //cl_khr_fp64\n"
40364"long __ovld atomic_exchange(volatile atomic_long *object, long desired);\n"
40365"long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, memory_order order);\n"
40366"long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, memory_order order, memory_scope scope);\n"
40367"ulong __ovld atomic_exchange(volatile atomic_ulong *object, ulong desired);\n"
40368"ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, ulong desired, memory_order order);\n"
40369"ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, ulong desired, memory_order order, memory_scope scope);\n"
40370"#endif\n"
40371"\n"
40372"// atomic_compare_exchange_strong() and atomic_compare_exchange_weak()\n"
40373"\n"
40374"bool __ovld atomic_compare_exchange_strong(volatile atomic_int *object, int *expected, int desired);\n"
40375"bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, int *expected,\n"
40376" int desired, memory_order success, memory_order failure);\n"
40377"bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, int *expected,\n"
40378" int desired, memory_order success, memory_order failure, memory_scope scope);\n"
40379"bool __ovld atomic_compare_exchange_strong(volatile atomic_uint *object, uint *expected, uint desired);\n"
40380"bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_uint *object, uint *expected,\n"
40381" uint desired, memory_order success, memory_order failure);\n"
40382"bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_uint *object, uint *expected,\n"
40383" uint desired, memory_order success, memory_order failure, memory_scope scope);\n"
40384"bool __ovld atomic_compare_exchange_weak(volatile atomic_int *object, int *expected, int desired);\n"
40385"bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object, int *expected,\n"
40386" int desired, memory_order success, memory_order failure);\n"
40387"bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object, int *expected,\n"
40388" int desired, memory_order success, memory_order failure, memory_scope scope);\n"
40389"bool __ovld atomic_compare_exchange_weak(volatile atomic_uint *object, uint *expected, uint desired);\n"
40390"bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object, uint *expected,\n"
40391" uint desired, memory_order success, memory_order failure);\n"
40392"bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object, uint *expected,\n"
40393" uint desired, memory_order success, memory_order failure, memory_scope scope);\n"
40394"bool __ovld atomic_compare_exchange_strong(volatile atomic_float *object, float *expected, float desired);\n"
40395"bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_float *object, float *expected,\n"
40396" float desired, memory_order success, memory_order failure);\n"
40397"bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_float *object, float *expected,\n"
40398" float desired, memory_order success, memory_order failure, memory_scope scope);\n"
40399"bool __ovld atomic_compare_exchange_weak(volatile atomic_float *object, float *expected, float desired);\n"
40400"bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_float *object, float *expected,\n"
40401" float desired, memory_order success, memory_order failure);\n"
40402"bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_float *object, float *expected,\n"
40403" float desired, memory_order success, memory_order failure, memory_scope scope);\n"
40404"#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n"
40405"#ifdef cl_khr_fp64\n"
40406"bool __ovld atomic_compare_exchange_strong(volatile atomic_double *object, double *expected, double desired);\n"
40407"bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_double *object, double *expected,\n"
40408" double desired, memory_order success, memory_order failure);\n"
40409"bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_double *object, double *expected,\n"
40410" double desired, memory_order success, memory_order failure, memory_scope scope);\n"
40411"bool __ovld atomic_compare_exchange_weak(volatile atomic_double *object, double *expected, double desired);\n"
40412"bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_double *object, double *expected,\n"
40413" double desired, memory_order success, memory_order failure);\n"
40414"bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_double *object, double *expected,\n"
40415" double desired, memory_order success, memory_order failure, memory_scope scope);\n"
40416"#endif //cl_khr_fp64\n"
40417"bool __ovld atomic_compare_exchange_strong(volatile atomic_long *object, long *expected, long desired);\n"
40418"bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_long *object, long *expected,\n"
40419" long desired, memory_order success, memory_order failure);\n"
40420"bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_long *object, long *expected,\n"
40421" long desired, memory_order success, memory_order failure, memory_scope scope);\n"
40422"bool __ovld atomic_compare_exchange_weak(volatile atomic_long *object, long *expected, long desired);\n"
40423"bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object, long *expected,\n"
40424" long desired, memory_order success, memory_order failure);\n"
40425"bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object, long *expected,\n"
40426" long desired, memory_order success, memory_order failure, memory_scope scope);\n"
40427"bool __ovld atomic_compare_exchange_strong(volatile atomic_ulong *object, ulong *expected, ulong desired);\n"
40428"bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_ulong *object, ulong *expected,\n"
40429" ulong desired, memory_order success, memory_order failure);\n"
40430"bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_ulong *object, ulong *expected,\n"
40431" ulong desired, memory_order success, memory_order failure, memory_scope scope);\n"
40432"bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong *object, ulong *expected, ulong desired);\n"
40433"bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *object, ulong *expected,\n"
40434" ulong desired, memory_order success, memory_order failure);\n"
40435"bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *object, ulong *expected,\n"
40436" ulong desired, memory_order success, memory_order failure, memory_scope scope);\n"
40437"#endif\n"
40438"\n"
40439"// atomic_flag_test_and_set() and atomic_flag_clear()\n"
40440"\n"
40441"bool __ovld atomic_flag_test_and_set(volatile atomic_flag *object);\n"
40442"bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, memory_order order);\n"
40443"bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, memory_order order, memory_scope scope);\n"
40444"void __ovld atomic_flag_clear(volatile atomic_flag *object);\n"
40445"void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, memory_order order);\n"
40446"void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, memory_order order, memory_scope scope);\n"
40447"\n"
40448"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
40449"\n"
40450"// OpenCL v1.1 s6.11.12, v1.2 s6.12.12, v2.0 s6.13.12 - Miscellaneous Vector Functions\n"
40451"\n"
40452"/**\n"
40453" * The shuffle and shuffle2 built-in functions construct\n"
40454" * a permutation of elements from one or two input\n"
40455" * vectors respectively that are of the same type,\n"
40456" * returning a vector with the same element type as the\n"
40457" * input and length that is the same as the shuffle mask.\n"
40458" * The size of each element in the mask must match the\n"
40459" * size of each element in the result. For shuffle, only\n"
40460" * the ilogb(2m-1) least significant bits of each mask\n"
40461" * element are considered. For shuffle2, only the\n"
40462" * ilogb(2m-1)+1 least significant bits of each mask\n"
40463" * element are considered. Other bits in the mask shall\n"
40464" * be ignored.\n"
40465" * The elements of the input vectors are numbered from\n"
40466" * left to right across one or both of the vectors. For this\n"
40467" * purpose, the number of elements in a vector is given\n"
40468" * by vec_step(gentypem). The shuffle mask operand\n"
40469" * specifies, for each element of the result vector, which\n"
40470" * element of the one or two input vectors the result\n"
40471" * element gets.\n"
40472" * Examples:\n"
40473" * uint4 mask = (uint4)(3, 2,\n"
40474" * 1, 0);\n"
40475" * float4 a;\n"
40476" * float4 r = shuffle(a, mask);\n"
40477" * // r.s0123 = a.wzyx\n"
40478" * uint8 mask = (uint8)(0, 1, 2, 3,\n"
40479" * 4, 5, 6, 7);\n"
40480" * float4 a, b;\n"
40481" * float8 r = shuffle2(a, b, mask);\n"
40482" * // r.s0123 = a.xyzw\n"
40483" * // r.s4567 = b.xyzw\n"
40484" * uint4 mask;\n"
40485" * float8 a;\n"
40486" * float4 b;\n"
40487" * b = shuffle(a, mask);\n"
40488" * Examples that are not valid are:\n"
40489" * uint8 mask;\n"
40490" * short16 a;\n"
40491" * short8 b;\n"
40492" * b = shuffle(a, mask); <- not valid\n"
40493" */\n"
40494"char2 __ovld __cnfn shuffle(char2 x, uchar2 mask);\n"
40495"char2 __ovld __cnfn shuffle(char4 x, uchar2 mask);\n"
40496"char2 __ovld __cnfn shuffle(char8 x, uchar2 mask);\n"
40497"char2 __ovld __cnfn shuffle(char16 x, uchar2 mask);\n"
40498"\n"
40499"uchar2 __ovld __cnfn shuffle(uchar2 x, uchar2 mask);\n"
40500"uchar2 __ovld __cnfn shuffle(uchar4 x, uchar2 mask);\n"
40501"uchar2 __ovld __cnfn shuffle(uchar8 x, uchar2 mask);\n"
40502"uchar2 __ovld __cnfn shuffle(uchar16 x, uchar2 mask);\n"
40503"\n"
40504"short2 __ovld __cnfn shuffle(short2 x, ushort2 mask);\n"
40505"short2 __ovld __cnfn shuffle(short4 x, ushort2 mask);\n"
40506"short2 __ovld __cnfn shuffle(short8 x, ushort2 mask);\n"
40507"short2 __ovld __cnfn shuffle(short16 x, ushort2 mask);\n"
40508"\n"
40509"ushort2 __ovld __cnfn shuffle(ushort2 x, ushort2 mask);\n"
40510"ushort2 __ovld __cnfn shuffle(ushort4 x, ushort2 mask);\n"
40511"ushort2 __ovld __cnfn shuffle(ushort8 x, ushort2 mask);\n"
40512"ushort2 __ovld __cnfn shuffle(ushort16 x, ushort2 mask);\n"
40513"\n"
40514"int2 __ovld __cnfn shuffle(int2 x, uint2 mask);\n"
40515"int2 __ovld __cnfn shuffle(int4 x, uint2 mask);\n"
40516"int2 __ovld __cnfn shuffle(int8 x, uint2 mask);\n"
40517"int2 __ovld __cnfn shuffle(int16 x, uint2 mask);\n"
40518"\n"
40519"uint2 __ovld __cnfn shuffle(uint2 x, uint2 mask);\n"
40520"uint2 __ovld __cnfn shuffle(uint4 x, uint2 mask);\n"
40521"uint2 __ovld __cnfn shuffle(uint8 x, uint2 mask);\n"
40522"uint2 __ovld __cnfn shuffle(uint16 x, uint2 mask);\n"
40523"\n"
40524"long2 __ovld __cnfn shuffle(long2 x, ulong2 mask);\n"
40525"long2 __ovld __cnfn shuffle(long4 x, ulong2 mask);\n"
40526"long2 __ovld __cnfn shuffle(long8 x, ulong2 mask);\n"
40527"long2 __ovld __cnfn shuffle(long16 x, ulong2 mask);\n"
40528"\n"
40529"ulong2 __ovld __cnfn shuffle(ulong2 x, ulong2 mask);\n"
40530"ulong2 __ovld __cnfn shuffle(ulong4 x, ulong2 mask);\n"
40531"ulong2 __ovld __cnfn shuffle(ulong8 x, ulong2 mask);\n"
40532"ulong2 __ovld __cnfn shuffle(ulong16 x, ulong2 mask);\n"
40533"\n"
40534"float2 __ovld __cnfn shuffle(float2 x, uint2 mask);\n"
40535"float2 __ovld __cnfn shuffle(float4 x, uint2 mask);\n"
40536"float2 __ovld __cnfn shuffle(float8 x, uint2 mask);\n"
40537"float2 __ovld __cnfn shuffle(float16 x, uint2 mask);\n"
40538"\n"
40539"char4 __ovld __cnfn shuffle(char2 x, uchar4 mask);\n"
40540"char4 __ovld __cnfn shuffle(char4 x, uchar4 mask);\n"
40541"char4 __ovld __cnfn shuffle(char8 x, uchar4 mask);\n"
40542"char4 __ovld __cnfn shuffle(char16 x, uchar4 mask);\n"
40543"\n"
40544"uchar4 __ovld __cnfn shuffle(uchar2 x, uchar4 mask);\n"
40545"uchar4 __ovld __cnfn shuffle(uchar4 x, uchar4 mask);\n"
40546"uchar4 __ovld __cnfn shuffle(uchar8 x, uchar4 mask);\n"
40547"uchar4 __ovld __cnfn shuffle(uchar16 x, uchar4 mask);\n"
40548"\n"
40549"short4 __ovld __cnfn shuffle(short2 x, ushort4 mask);\n"
40550"short4 __ovld __cnfn shuffle(short4 x, ushort4 mask);\n"
40551"short4 __ovld __cnfn shuffle(short8 x, ushort4 mask);\n"
40552"short4 __ovld __cnfn shuffle(short16 x, ushort4 mask);\n"
40553"\n"
40554"ushort4 __ovld __cnfn shuffle(ushort2 x, ushort4 mask);\n"
40555"ushort4 __ovld __cnfn shuffle(ushort4 x, ushort4 mask);\n"
40556"ushort4 __ovld __cnfn shuffle(ushort8 x, ushort4 mask);\n"
40557"ushort4 __ovld __cnfn shuffle(ushort16 x, ushort4 mask);\n"
40558"\n"
40559"int4 __ovld __cnfn shuffle(int2 x, uint4 mask);\n"
40560"int4 __ovld __cnfn shuffle(int4 x, uint4 mask);\n"
40561"int4 __ovld __cnfn shuffle(int8 x, uint4 mask);\n"
40562"int4 __ovld __cnfn shuffle(int16 x, uint4 mask);\n"
40563"\n"
40564"uint4 __ovld __cnfn shuffle(uint2 x, uint4 mask);\n"
40565"uint4 __ovld __cnfn shuffle(uint4 x, uint4 mask);\n"
40566"uint4 __ovld __cnfn shuffle(uint8 x, uint4 mask);\n"
40567"uint4 __ovld __cnfn shuffle(uint16 x, uint4 mask);\n"
40568"\n"
40569"long4 __ovld __cnfn shuffle(long2 x, ulong4 mask);\n"
40570"long4 __ovld __cnfn shuffle(long4 x, ulong4 mask);\n"
40571"long4 __ovld __cnfn shuffle(long8 x, ulong4 mask);\n"
40572"long4 __ovld __cnfn shuffle(long16 x, ulong4 mask);\n"
40573"\n"
40574"ulong4 __ovld __cnfn shuffle(ulong2 x, ulong4 mask);\n"
40575"ulong4 __ovld __cnfn shuffle(ulong4 x, ulong4 mask);\n"
40576"ulong4 __ovld __cnfn shuffle(ulong8 x, ulong4 mask);\n"
40577"ulong4 __ovld __cnfn shuffle(ulong16 x, ulong4 mask);\n"
40578"\n"
40579"float4 __ovld __cnfn shuffle(float2 x, uint4 mask);\n"
40580"float4 __ovld __cnfn shuffle(float4 x, uint4 mask);\n"
40581"float4 __ovld __cnfn shuffle(float8 x, uint4 mask);\n"
40582"float4 __ovld __cnfn shuffle(float16 x, uint4 mask);\n"
40583"\n"
40584"char8 __ovld __cnfn shuffle(char2 x, uchar8 mask);\n"
40585"char8 __ovld __cnfn shuffle(char4 x, uchar8 mask);\n"
40586"char8 __ovld __cnfn shuffle(char8 x, uchar8 mask);\n"
40587"char8 __ovld __cnfn shuffle(char16 x, uchar8 mask);\n"
40588"\n"
40589"uchar8 __ovld __cnfn shuffle(uchar2 x, uchar8 mask);\n"
40590"uchar8 __ovld __cnfn shuffle(uchar4 x, uchar8 mask);\n"
40591"uchar8 __ovld __cnfn shuffle(uchar8 x, uchar8 mask);\n"
40592"uchar8 __ovld __cnfn shuffle(uchar16 x, uchar8 mask);\n"
40593"\n"
40594"short8 __ovld __cnfn shuffle(short2 x, ushort8 mask);\n"
40595"short8 __ovld __cnfn shuffle(short4 x, ushort8 mask);\n"
40596"short8 __ovld __cnfn shuffle(short8 x, ushort8 mask);\n"
40597"short8 __ovld __cnfn shuffle(short16 x, ushort8 mask);\n"
40598"\n"
40599"ushort8 __ovld __cnfn shuffle(ushort2 x, ushort8 mask);\n"
40600"ushort8 __ovld __cnfn shuffle(ushort4 x, ushort8 mask);\n"
40601"ushort8 __ovld __cnfn shuffle(ushort8 x, ushort8 mask);\n"
40602"ushort8 __ovld __cnfn shuffle(ushort16 x, ushort8 mask);\n"
40603"\n"
40604"int8 __ovld __cnfn shuffle(int2 x, uint8 mask);\n"
40605"int8 __ovld __cnfn shuffle(int4 x, uint8 mask);\n"
40606"int8 __ovld __cnfn shuffle(int8 x, uint8 mask);\n"
40607"int8 __ovld __cnfn shuffle(int16 x, uint8 mask);\n"
40608"\n"
40609"uint8 __ovld __cnfn shuffle(uint2 x, uint8 mask);\n"
40610"uint8 __ovld __cnfn shuffle(uint4 x, uint8 mask);\n"
40611"uint8 __ovld __cnfn shuffle(uint8 x, uint8 mask);\n"
40612"uint8 __ovld __cnfn shuffle(uint16 x, uint8 mask);\n"
40613"\n"
40614"long8 __ovld __cnfn shuffle(long2 x, ulong8 mask);\n"
40615"long8 __ovld __cnfn shuffle(long4 x, ulong8 mask);\n"
40616"long8 __ovld __cnfn shuffle(long8 x, ulong8 mask);\n"
40617"long8 __ovld __cnfn shuffle(long16 x, ulong8 mask);\n"
40618"\n"
40619"ulong8 __ovld __cnfn shuffle(ulong2 x, ulong8 mask);\n"
40620"ulong8 __ovld __cnfn shuffle(ulong4 x, ulong8 mask);\n"
40621"ulong8 __ovld __cnfn shuffle(ulong8 x, ulong8 mask);\n"
40622"ulong8 __ovld __cnfn shuffle(ulong16 x, ulong8 mask);\n"
40623"\n"
40624"float8 __ovld __cnfn shuffle(float2 x, uint8 mask);\n"
40625"float8 __ovld __cnfn shuffle(float4 x, uint8 mask);\n"
40626"float8 __ovld __cnfn shuffle(float8 x, uint8 mask);\n"
40627"float8 __ovld __cnfn shuffle(float16 x, uint8 mask);\n"
40628"\n"
40629"char16 __ovld __cnfn shuffle(char2 x, uchar16 mask);\n"
40630"char16 __ovld __cnfn shuffle(char4 x, uchar16 mask);\n"
40631"char16 __ovld __cnfn shuffle(char8 x, uchar16 mask);\n"
40632"char16 __ovld __cnfn shuffle(char16 x, uchar16 mask);\n"
40633"\n"
40634"uchar16 __ovld __cnfn shuffle(uchar2 x, uchar16 mask);\n"
40635"uchar16 __ovld __cnfn shuffle(uchar4 x, uchar16 mask);\n"
40636"uchar16 __ovld __cnfn shuffle(uchar8 x, uchar16 mask);\n"
40637"uchar16 __ovld __cnfn shuffle(uchar16 x, uchar16 mask);\n"
40638"\n"
40639"short16 __ovld __cnfn shuffle(short2 x, ushort16 mask);\n"
40640"short16 __ovld __cnfn shuffle(short4 x, ushort16 mask);\n"
40641"short16 __ovld __cnfn shuffle(short8 x, ushort16 mask);\n"
40642"short16 __ovld __cnfn shuffle(short16 x, ushort16 mask);\n"
40643"\n"
40644"ushort16 __ovld __cnfn shuffle(ushort2 x, ushort16 mask);\n"
40645"ushort16 __ovld __cnfn shuffle(ushort4 x, ushort16 mask);\n"
40646"ushort16 __ovld __cnfn shuffle(ushort8 x, ushort16 mask);\n"
40647"ushort16 __ovld __cnfn shuffle(ushort16 x, ushort16 mask);\n"
40648"\n"
40649"int16 __ovld __cnfn shuffle(int2 x, uint16 mask);\n"
40650"int16 __ovld __cnfn shuffle(int4 x, uint16 mask);\n"
40651"int16 __ovld __cnfn shuffle(int8 x, uint16 mask);\n"
40652"int16 __ovld __cnfn shuffle(int16 x, uint16 mask);\n"
40653"\n"
40654"uint16 __ovld __cnfn shuffle(uint2 x, uint16 mask);\n"
40655"uint16 __ovld __cnfn shuffle(uint4 x, uint16 mask);\n"
40656"uint16 __ovld __cnfn shuffle(uint8 x, uint16 mask);\n"
40657"uint16 __ovld __cnfn shuffle(uint16 x, uint16 mask);\n"
40658"\n"
40659"long16 __ovld __cnfn shuffle(long2 x, ulong16 mask);\n"
40660"long16 __ovld __cnfn shuffle(long4 x, ulong16 mask);\n"
40661"long16 __ovld __cnfn shuffle(long8 x, ulong16 mask);\n"
40662"long16 __ovld __cnfn shuffle(long16 x, ulong16 mask);\n"
40663"\n"
40664"ulong16 __ovld __cnfn shuffle(ulong2 x, ulong16 mask);\n"
40665"ulong16 __ovld __cnfn shuffle(ulong4 x, ulong16 mask);\n"
40666"ulong16 __ovld __cnfn shuffle(ulong8 x, ulong16 mask);\n"
40667"ulong16 __ovld __cnfn shuffle(ulong16 x, ulong16 mask);\n"
40668"\n"
40669"float16 __ovld __cnfn shuffle(float2 x, uint16 mask);\n"
40670"float16 __ovld __cnfn shuffle(float4 x, uint16 mask);\n"
40671"float16 __ovld __cnfn shuffle(float8 x, uint16 mask);\n"
40672"float16 __ovld __cnfn shuffle(float16 x, uint16 mask);\n"
40673"\n"
40674"#ifdef cl_khr_fp64\n"
40675"double2 __ovld __cnfn shuffle(double2 x, ulong2 mask);\n"
40676"double2 __ovld __cnfn shuffle(double4 x, ulong2 mask);\n"
40677"double2 __ovld __cnfn shuffle(double8 x, ulong2 mask);\n"
40678"double2 __ovld __cnfn shuffle(double16 x, ulong2 mask);\n"
40679"\n"
40680"double4 __ovld __cnfn shuffle(double2 x, ulong4 mask);\n"
40681"double4 __ovld __cnfn shuffle(double4 x, ulong4 mask);\n"
40682"double4 __ovld __cnfn shuffle(double8 x, ulong4 mask);\n"
40683"double4 __ovld __cnfn shuffle(double16 x, ulong4 mask);\n"
40684"\n"
40685"double8 __ovld __cnfn shuffle(double2 x, ulong8 mask);\n"
40686"double8 __ovld __cnfn shuffle(double4 x, ulong8 mask);\n"
40687"double8 __ovld __cnfn shuffle(double8 x, ulong8 mask);\n"
40688"double8 __ovld __cnfn shuffle(double16 x, ulong8 mask);\n"
40689"\n"
40690"double16 __ovld __cnfn shuffle(double2 x, ulong16 mask);\n"
40691"double16 __ovld __cnfn shuffle(double4 x, ulong16 mask);\n"
40692"double16 __ovld __cnfn shuffle(double8 x, ulong16 mask);\n"
40693"double16 __ovld __cnfn shuffle(double16 x, ulong16 mask);\n"
40694"#endif //cl_khr_fp64\n"
40695"\n"
40696"#ifdef cl_khr_fp16\n"
40697"half2 __ovld __cnfn shuffle(half2 x, ushort2 mask);\n"
40698"half2 __ovld __cnfn shuffle(half4 x, ushort2 mask);\n"
40699"half2 __ovld __cnfn shuffle(half8 x, ushort2 mask);\n"
40700"half2 __ovld __cnfn shuffle(half16 x, ushort2 mask);\n"
40701"\n"
40702"half4 __ovld __cnfn shuffle(half2 x, ushort4 mask);\n"
40703"half4 __ovld __cnfn shuffle(half4 x, ushort4 mask);\n"
40704"half4 __ovld __cnfn shuffle(half8 x, ushort4 mask);\n"
40705"half4 __ovld __cnfn shuffle(half16 x, ushort4 mask);\n"
40706"\n"
40707"half8 __ovld __cnfn shuffle(half2 x, ushort8 mask);\n"
40708"half8 __ovld __cnfn shuffle(half4 x, ushort8 mask);\n"
40709"half8 __ovld __cnfn shuffle(half8 x, ushort8 mask);\n"
40710"half8 __ovld __cnfn shuffle(half16 x, ushort8 mask);\n"
40711"\n"
40712"half16 __ovld __cnfn shuffle(half2 x, ushort16 mask);\n"
40713"half16 __ovld __cnfn shuffle(half4 x, ushort16 mask);\n"
40714"half16 __ovld __cnfn shuffle(half8 x, ushort16 mask);\n"
40715"half16 __ovld __cnfn shuffle(half16 x, ushort16 mask);\n"
40716"#endif //cl_khr_fp16\n"
40717"\n"
40718"char2 __ovld __cnfn shuffle2(char2 x, char2 y, uchar2 mask);\n"
40719"char2 __ovld __cnfn shuffle2(char4 x, char4 y, uchar2 mask);\n"
40720"char2 __ovld __cnfn shuffle2(char8 x, char8 y, uchar2 mask);\n"
40721"char2 __ovld __cnfn shuffle2(char16 x, char16 y, uchar2 mask);\n"
40722"\n"
40723"uchar2 __ovld __cnfn shuffle2(uchar2 x, uchar2 y, uchar2 mask);\n"
40724"uchar2 __ovld __cnfn shuffle2(uchar4 x, uchar4 y, uchar2 mask);\n"
40725"uchar2 __ovld __cnfn shuffle2(uchar8 x, uchar8 y, uchar2 mask);\n"
40726"uchar2 __ovld __cnfn shuffle2(uchar16 x, uchar16 y, uchar2 mask);\n"
40727"\n"
40728"short2 __ovld __cnfn shuffle2(short2 x, short2 y, ushort2 mask);\n"
40729"short2 __ovld __cnfn shuffle2(short4 x, short4 y, ushort2 mask);\n"
40730"short2 __ovld __cnfn shuffle2(short8 x, short8 y, ushort2 mask);\n"
40731"short2 __ovld __cnfn shuffle2(short16 x, short16 y, ushort2 mask);\n"
40732"\n"
40733"ushort2 __ovld __cnfn shuffle2(ushort2 x, ushort2 y, ushort2 mask);\n"
40734"ushort2 __ovld __cnfn shuffle2(ushort4 x, ushort4 y, ushort2 mask);\n"
40735"ushort2 __ovld __cnfn shuffle2(ushort8 x, ushort8 y, ushort2 mask);\n"
40736"ushort2 __ovld __cnfn shuffle2(ushort16 x, ushort16 y, ushort2 mask);\n"
40737"\n"
40738"int2 __ovld __cnfn shuffle2(int2 x, int2 y, uint2 mask);\n"
40739"int2 __ovld __cnfn shuffle2(int4 x, int4 y, uint2 mask);\n"
40740"int2 __ovld __cnfn shuffle2(int8 x, int8 y, uint2 mask);\n"
40741"int2 __ovld __cnfn shuffle2(int16 x, int16 y, uint2 mask);\n"
40742"\n"
40743"uint2 __ovld __cnfn shuffle2(uint2 x, uint2 y, uint2 mask);\n"
40744"uint2 __ovld __cnfn shuffle2(uint4 x, uint4 y, uint2 mask);\n"
40745"uint2 __ovld __cnfn shuffle2(uint8 x, uint8 y, uint2 mask);\n"
40746"uint2 __ovld __cnfn shuffle2(uint16 x, uint16 y, uint2 mask);\n"
40747"\n"
40748"long2 __ovld __cnfn shuffle2(long2 x, long2 y, ulong2 mask);\n"
40749"long2 __ovld __cnfn shuffle2(long4 x, long4 y, ulong2 mask);\n"
40750"long2 __ovld __cnfn shuffle2(long8 x, long8 y, ulong2 mask);\n"
40751"long2 __ovld __cnfn shuffle2(long16 x, long16 y, ulong2 mask);\n"
40752"\n"
40753"ulong2 __ovld __cnfn shuffle2(ulong2 x, ulong2 y, ulong2 mask);\n"
40754"ulong2 __ovld __cnfn shuffle2(ulong4 x, ulong4 y, ulong2 mask);\n"
40755"ulong2 __ovld __cnfn shuffle2(ulong8 x, ulong8 y, ulong2 mask);\n"
40756"ulong2 __ovld __cnfn shuffle2(ulong16 x, ulong16 y, ulong2 mask);\n"
40757"\n"
40758"float2 __ovld __cnfn shuffle2(float2 x, float2 y, uint2 mask);\n"
40759"float2 __ovld __cnfn shuffle2(float4 x, float4 y, uint2 mask);\n"
40760"float2 __ovld __cnfn shuffle2(float8 x, float8 y, uint2 mask);\n"
40761"float2 __ovld __cnfn shuffle2(float16 x, float16 y, uint2 mask);\n"
40762"\n"
40763"char4 __ovld __cnfn shuffle2(char2 x, char2 y, uchar4 mask);\n"
40764"char4 __ovld __cnfn shuffle2(char4 x, char4 y, uchar4 mask);\n"
40765"char4 __ovld __cnfn shuffle2(char8 x, char8 y, uchar4 mask);\n"
40766"char4 __ovld __cnfn shuffle2(char16 x, char16 y, uchar4 mask);\n"
40767"\n"
40768"uchar4 __ovld __cnfn shuffle2(uchar2 x, uchar2 y, uchar4 mask);\n"
40769"uchar4 __ovld __cnfn shuffle2(uchar4 x, uchar4 y, uchar4 mask);\n"
40770"uchar4 __ovld __cnfn shuffle2(uchar8 x, uchar8 y, uchar4 mask);\n"
40771"uchar4 __ovld __cnfn shuffle2(uchar16 x, uchar16 y, uchar4 mask);\n"
40772"\n"
40773"short4 __ovld __cnfn shuffle2(short2 x, short2 y, ushort4 mask);\n"
40774"short4 __ovld __cnfn shuffle2(short4 x, short4 y, ushort4 mask);\n"
40775"short4 __ovld __cnfn shuffle2(short8 x, short8 y, ushort4 mask);\n"
40776"short4 __ovld __cnfn shuffle2(short16 x, short16 y, ushort4 mask);\n"
40777"\n"
40778"ushort4 __ovld __cnfn shuffle2(ushort2 x, ushort2 y, ushort4 mask);\n"
40779"ushort4 __ovld __cnfn shuffle2(ushort4 x, ushort4 y, ushort4 mask);\n"
40780"ushort4 __ovld __cnfn shuffle2(ushort8 x, ushort8 y, ushort4 mask);\n"
40781"ushort4 __ovld __cnfn shuffle2(ushort16 x, ushort16 y, ushort4 mask);\n"
40782"\n"
40783"int4 __ovld __cnfn shuffle2(int2 x, int2 y, uint4 mask);\n"
40784"int4 __ovld __cnfn shuffle2(int4 x, int4 y, uint4 mask);\n"
40785"int4 __ovld __cnfn shuffle2(int8 x, int8 y, uint4 mask);\n"
40786"int4 __ovld __cnfn shuffle2(int16 x, int16 y, uint4 mask);\n"
40787"\n"
40788"uint4 __ovld __cnfn shuffle2(uint2 x, uint2 y, uint4 mask);\n"
40789"uint4 __ovld __cnfn shuffle2(uint4 x, uint4 y, uint4 mask);\n"
40790"uint4 __ovld __cnfn shuffle2(uint8 x, uint8 y, uint4 mask);\n"
40791"uint4 __ovld __cnfn shuffle2(uint16 x, uint16 y, uint4 mask);\n"
40792"\n"
40793"long4 __ovld __cnfn shuffle2(long2 x, long2 y, ulong4 mask);\n"
40794"long4 __ovld __cnfn shuffle2(long4 x, long4 y, ulong4 mask);\n"
40795"long4 __ovld __cnfn shuffle2(long8 x, long8 y, ulong4 mask);\n"
40796"long4 __ovld __cnfn shuffle2(long16 x, long16 y, ulong4 mask);\n"
40797"\n"
40798"ulong4 __ovld __cnfn shuffle2(ulong2 x, ulong2 y, ulong4 mask);\n"
40799"ulong4 __ovld __cnfn shuffle2(ulong4 x, ulong4 y, ulong4 mask);\n"
40800"ulong4 __ovld __cnfn shuffle2(ulong8 x, ulong8 y, ulong4 mask);\n"
40801"ulong4 __ovld __cnfn shuffle2(ulong16 x, ulong16 y, ulong4 mask);\n"
40802"\n"
40803"float4 __ovld __cnfn shuffle2(float2 x, float2 y, uint4 mask);\n"
40804"float4 __ovld __cnfn shuffle2(float4 x, float4 y, uint4 mask);\n"
40805"float4 __ovld __cnfn shuffle2(float8 x, float8 y, uint4 mask);\n"
40806"float4 __ovld __cnfn shuffle2(float16 x, float16 y, uint4 mask);\n"
40807"\n"
40808"char8 __ovld __cnfn shuffle2(char2 x, char2 y, uchar8 mask);\n"
40809"char8 __ovld __cnfn shuffle2(char4 x, char4 y, uchar8 mask);\n"
40810"char8 __ovld __cnfn shuffle2(char8 x, char8 y, uchar8 mask);\n"
40811"char8 __ovld __cnfn shuffle2(char16 x, char16 y, uchar8 mask);\n"
40812"\n"
40813"uchar8 __ovld __cnfn shuffle2(uchar2 x, uchar2 y, uchar8 mask);\n"
40814"uchar8 __ovld __cnfn shuffle2(uchar4 x, uchar4 y, uchar8 mask);\n"
40815"uchar8 __ovld __cnfn shuffle2(uchar8 x, uchar8 y, uchar8 mask);\n"
40816"uchar8 __ovld __cnfn shuffle2(uchar16 x, uchar16 y, uchar8 mask);\n"
40817"\n"
40818"short8 __ovld __cnfn shuffle2(short2 x, short2 y, ushort8 mask);\n"
40819"short8 __ovld __cnfn shuffle2(short4 x, short4 y, ushort8 mask);\n"
40820"short8 __ovld __cnfn shuffle2(short8 x, short8 y, ushort8 mask);\n"
40821"short8 __ovld __cnfn shuffle2(short16 x, short16 y, ushort8 mask);\n"
40822"\n"
40823"ushort8 __ovld __cnfn shuffle2(ushort2 x, ushort2 y, ushort8 mask);\n"
40824"ushort8 __ovld __cnfn shuffle2(ushort4 x, ushort4 y, ushort8 mask);\n"
40825"ushort8 __ovld __cnfn shuffle2(ushort8 x, ushort8 y, ushort8 mask);\n"
40826"ushort8 __ovld __cnfn shuffle2(ushort16 x, ushort16 y, ushort8 mask);\n"
40827"\n"
40828"int8 __ovld __cnfn shuffle2(int2 x, int2 y, uint8 mask);\n"
40829"int8 __ovld __cnfn shuffle2(int4 x, int4 y, uint8 mask);\n"
40830"int8 __ovld __cnfn shuffle2(int8 x, int8 y, uint8 mask);\n"
40831"int8 __ovld __cnfn shuffle2(int16 x, int16 y, uint8 mask);\n"
40832"\n"
40833"uint8 __ovld __cnfn shuffle2(uint2 x, uint2 y, uint8 mask);\n"
40834"uint8 __ovld __cnfn shuffle2(uint4 x, uint4 y, uint8 mask);\n"
40835"uint8 __ovld __cnfn shuffle2(uint8 x, uint8 y, uint8 mask);\n"
40836"uint8 __ovld __cnfn shuffle2(uint16 x, uint16 y, uint8 mask);\n"
40837"\n"
40838"long8 __ovld __cnfn shuffle2(long2 x, long2 y, ulong8 mask);\n"
40839"long8 __ovld __cnfn shuffle2(long4 x, long4 y, ulong8 mask);\n"
40840"long8 __ovld __cnfn shuffle2(long8 x, long8 y, ulong8 mask);\n"
40841"long8 __ovld __cnfn shuffle2(long16 x, long16 y, ulong8 mask);\n"
40842"\n"
40843"ulong8 __ovld __cnfn shuffle2(ulong2 x, ulong2 y, ulong8 mask);\n"
40844"ulong8 __ovld __cnfn shuffle2(ulong4 x, ulong4 y, ulong8 mask);\n"
40845"ulong8 __ovld __cnfn shuffle2(ulong8 x, ulong8 y, ulong8 mask);\n"
40846"ulong8 __ovld __cnfn shuffle2(ulong16 x, ulong16 y, ulong8 mask);\n"
40847"\n"
40848"float8 __ovld __cnfn shuffle2(float2 x, float2 y, uint8 mask);\n"
40849"float8 __ovld __cnfn shuffle2(float4 x, float4 y, uint8 mask);\n"
40850"float8 __ovld __cnfn shuffle2(float8 x, float8 y, uint8 mask);\n"
40851"float8 __ovld __cnfn shuffle2(float16 x, float16 y, uint8 mask);\n"
40852"\n"
40853"char16 __ovld __cnfn shuffle2(char2 x, char2 y, uchar16 mask);\n"
40854"char16 __ovld __cnfn shuffle2(char4 x, char4 y, uchar16 mask);\n"
40855"char16 __ovld __cnfn shuffle2(char8 x, char8 y, uchar16 mask);\n"
40856"char16 __ovld __cnfn shuffle2(char16 x, char16 y, uchar16 mask);\n"
40857"\n"
40858"uchar16 __ovld __cnfn shuffle2(uchar2 x, uchar2 y, uchar16 mask);\n"
40859"uchar16 __ovld __cnfn shuffle2(uchar4 x, uchar4 y, uchar16 mask);\n"
40860"uchar16 __ovld __cnfn shuffle2(uchar8 x, uchar8 y, uchar16 mask);\n"
40861"uchar16 __ovld __cnfn shuffle2(uchar16 x, uchar16 y, uchar16 mask);\n"
40862"\n"
40863"short16 __ovld __cnfn shuffle2(short2 x, short2 y, ushort16 mask);\n"
40864"short16 __ovld __cnfn shuffle2(short4 x, short4 y, ushort16 mask);\n"
40865"short16 __ovld __cnfn shuffle2(short8 x, short8 y, ushort16 mask);\n"
40866"short16 __ovld __cnfn shuffle2(short16 x, short16 y, ushort16 mask);\n"
40867"\n"
40868"ushort16 __ovld __cnfn shuffle2(ushort2 x, ushort2 y, ushort16 mask);\n"
40869"ushort16 __ovld __cnfn shuffle2(ushort4 x, ushort4 y, ushort16 mask);\n"
40870"ushort16 __ovld __cnfn shuffle2(ushort8 x, ushort8 y, ushort16 mask);\n"
40871"ushort16 __ovld __cnfn shuffle2(ushort16 x, ushort16 y, ushort16 mask);\n"
40872"\n"
40873"int16 __ovld __cnfn shuffle2(int2 x, int2 y, uint16 mask);\n"
40874"int16 __ovld __cnfn shuffle2(int4 x, int4 y, uint16 mask);\n"
40875"int16 __ovld __cnfn shuffle2(int8 x, int8 y, uint16 mask);\n"
40876"int16 __ovld __cnfn shuffle2(int16 x, int16 y, uint16 mask);\n"
40877"\n"
40878"uint16 __ovld __cnfn shuffle2(uint2 x, uint2 y, uint16 mask);\n"
40879"uint16 __ovld __cnfn shuffle2(uint4 x, uint4 y, uint16 mask);\n"
40880"uint16 __ovld __cnfn shuffle2(uint8 x, uint8 y, uint16 mask);\n"
40881"uint16 __ovld __cnfn shuffle2(uint16 x, uint16 y, uint16 mask);\n"
40882"\n"
40883"long16 __ovld __cnfn shuffle2(long2 x, long2 y, ulong16 mask);\n"
40884"long16 __ovld __cnfn shuffle2(long4 x, long4 y, ulong16 mask);\n"
40885"long16 __ovld __cnfn shuffle2(long8 x, long8 y, ulong16 mask);\n"
40886"long16 __ovld __cnfn shuffle2(long16 x, long16 y, ulong16 mask);\n"
40887"\n"
40888"ulong16 __ovld __cnfn shuffle2(ulong2 x, ulong2 y, ulong16 mask);\n"
40889"ulong16 __ovld __cnfn shuffle2(ulong4 x, ulong4 y, ulong16 mask);\n"
40890"ulong16 __ovld __cnfn shuffle2(ulong8 x, ulong8 y, ulong16 mask);\n"
40891"ulong16 __ovld __cnfn shuffle2(ulong16 x, ulong16 y, ulong16 mask);\n"
40892"\n"
40893"float16 __ovld __cnfn shuffle2(float2 x, float2 y, uint16 mask);\n"
40894"float16 __ovld __cnfn shuffle2(float4 x, float4 y, uint16 mask);\n"
40895"float16 __ovld __cnfn shuffle2(float8 x, float8 y, uint16 mask);\n"
40896"float16 __ovld __cnfn shuffle2(float16 x, float16 y, uint16 mask);\n"
40897"\n"
40898"#ifdef cl_khr_fp64\n"
40899"double2 __ovld __cnfn shuffle2(double2 x, double2 y, ulong2 mask);\n"
40900"double2 __ovld __cnfn shuffle2(double4 x, double4 y, ulong2 mask);\n"
40901"double2 __ovld __cnfn shuffle2(double8 x, double8 y, ulong2 mask);\n"
40902"double2 __ovld __cnfn shuffle2(double16 x, double16 y, ulong2 mask);\n"
40903"\n"
40904"double4 __ovld __cnfn shuffle2(double2 x, double2 y, ulong4 mask);\n"
40905"double4 __ovld __cnfn shuffle2(double4 x, double4 y, ulong4 mask);\n"
40906"double4 __ovld __cnfn shuffle2(double8 x, double8 y, ulong4 mask);\n"
40907"double4 __ovld __cnfn shuffle2(double16 x, double16 y, ulong4 mask);\n"
40908"\n"
40909"double8 __ovld __cnfn shuffle2(double2 x, double2 y, ulong8 mask);\n"
40910"double8 __ovld __cnfn shuffle2(double4 x, double4 y, ulong8 mask);\n"
40911"double8 __ovld __cnfn shuffle2(double8 x, double8 y, ulong8 mask);\n"
40912"double8 __ovld __cnfn shuffle2(double16 x, double16 y, ulong8 mask);\n"
40913"\n"
40914"double16 __ovld __cnfn shuffle2(double2 x, double2 y, ulong16 mask);\n"
40915"double16 __ovld __cnfn shuffle2(double4 x, double4 y, ulong16 mask);\n"
40916"double16 __ovld __cnfn shuffle2(double8 x, double8 y, ulong16 mask);\n"
40917"double16 __ovld __cnfn shuffle2(double16 x, double16 y, ulong16 mask);\n"
40918"#endif //cl_khr_fp64\n"
40919"\n"
40920"#ifdef cl_khr_fp16\n"
40921"half2 __ovld __cnfn shuffle2(half2 x, half2 y, ushort2 mask);\n"
40922"half2 __ovld __cnfn shuffle2(half4 x, half4 y, ushort2 mask);\n"
40923"half2 __ovld __cnfn shuffle2(half8 x, half8 y, ushort2 mask);\n"
40924"half2 __ovld __cnfn shuffle2(half16 x, half16 y, ushort2 mask);\n"
40925"\n"
40926"half4 __ovld __cnfn shuffle2(half2 x, half2 y, ushort4 mask);\n"
40927"half4 __ovld __cnfn shuffle2(half4 x, half4 y, ushort4 mask);\n"
40928"half4 __ovld __cnfn shuffle2(half8 x, half8 y, ushort4 mask);\n"
40929"half4 __ovld __cnfn shuffle2(half16 x, half16 y, ushort4 mask);\n"
40930"\n"
40931"half8 __ovld __cnfn shuffle2(half2 x, half2 y, ushort8 mask);\n"
40932"half8 __ovld __cnfn shuffle2(half4 x, half4 y, ushort8 mask);\n"
40933"half8 __ovld __cnfn shuffle2(half8 x, half8 y, ushort8 mask);\n"
40934"half8 __ovld __cnfn shuffle2(half16 x, half16 y, ushort8 mask);\n"
40935"\n"
40936"half16 __ovld __cnfn shuffle2(half2 x, half2 y, ushort16 mask);\n"
40937"half16 __ovld __cnfn shuffle2(half4 x, half4 y, ushort16 mask);\n"
40938"half16 __ovld __cnfn shuffle2(half8 x, half8 y, ushort16 mask);\n"
40939"half16 __ovld __cnfn shuffle2(half16 x, half16 y, ushort16 mask);\n"
40940"#endif //cl_khr_fp16\n"
40941"\n"
40942"#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2\n"
40943"// OpenCL v1.2 s6.12.13, v2.0 s6.13.13 - printf\n"
40944"\n"
40945"int printf(__constant const char* st, ...);\n"
40946"#endif\n"
40947"\n"
40948"// OpenCL v1.1 s6.11.3, v1.2 s6.12.14, v2.0 s6.13.14 - Image Read and Write Functions\n"
40949"\n"
40950"// These values need to match the runtime equivalent\n"
40951"//\n"
40952"// Addressing Mode.\n"
40953"//\n"
40954"#define CLK_ADDRESS_NONE 0\n"
40955"#define CLK_ADDRESS_CLAMP_TO_EDGE 2\n"
40956"#define CLK_ADDRESS_CLAMP 4\n"
40957"#define CLK_ADDRESS_REPEAT 6\n"
40958"#define CLK_ADDRESS_MIRRORED_REPEAT 8\n"
40959"\n"
40960"//\n"
40961"// Coordination Normalization\n"
40962"//\n"
40963"#define CLK_NORMALIZED_COORDS_FALSE 0\n"
40964"#define CLK_NORMALIZED_COORDS_TRUE 1\n"
40965"\n"
40966"//\n"
40967"// Filtering Mode.\n"
40968"//\n"
40969"#define CLK_FILTER_NEAREST 0x10\n"
40970"#define CLK_FILTER_LINEAR 0x20\n"
40971"\n"
40972"#ifdef cl_khr_gl_msaa_sharing\n"
40973"#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable\n"
40974"#endif //cl_khr_gl_msaa_sharing\n"
40975"\n"
40976"/**\n"
40977" * Use the coordinate (coord.xy) to do an element lookup in\n"
40978" * the 2D image object specified by image.\n"
40979" *\n"
40980" * Use the coordinate (coord.x, coord.y, coord.z) to do\n"
40981" * an element lookup in the 3D image object specified\n"
40982" * by image. coord.w is ignored.\n"
40983" *\n"
40984" * Use the coordinate (coord.z) to index into the\n"
40985" * 2D image array object specified by image_array\n"
40986" * and (coord.x, coord.y) to do an element lookup in\n"
40987" * the 2D image object specified by image.\n"
40988" *\n"
40989" * Use the coordinate (x) to do an element lookup in\n"
40990" * the 1D image object specified by image.\n"
40991" *\n"
40992" * Use the coordinate (coord.y) to index into the\n"
40993" * 1D image array object specified by image_array\n"
40994" * and (coord.x) to do an element lookup in\n"
40995" * the 1D image object specified by image.\n"
40996" *\n"
40997" * Use the coordinate (cood.xy) and sample to do an\n"
40998" * element lookup in the 2D multi-sample image specified\n"
40999" * by image.\n"
41000" *\n"
41001" * Use coord.xy and sample to do an element\n"
41002" * lookup in the 2D multi-sample image layer\n"
41003" * identified by index coord.z in the 2D multi-sample\n"
41004" * image array specified by image.\n"
41005" *\n"
41006" * For mipmap images, use the mip-level specified by\n"
41007" * the Level-of-Detail (lod) or use gradients for LOD\n"
41008" * computation.\n"
41009" *\n"
41010" * read_imagef returns floating-point values in the\n"
41011" * range [0.0 ... 1.0] for image objects created with\n"
41012" * image_channel_data_type set to one of the predefined\n"
41013" * packed formats or CL_UNORM_INT8, or\n"
41014" * CL_UNORM_INT16.\n"
41015" *\n"
41016" * read_imagef returns floating-point values in the\n"
41017" * range [-1.0 ... 1.0] for image objects created with\n"
41018" * image_channel_data_type set to CL_SNORM_INT8,\n"
41019" * or CL_SNORM_INT16.\n"
41020" *\n"
41021" * read_imagef returns floating-point values for image\n"
41022" * objects created with image_channel_data_type set to\n"
41023" * CL_HALF_FLOAT or CL_FLOAT.\n"
41024" *\n"
41025" * read_imagei and read_imageui return\n"
41026" * unnormalized signed integer and unsigned integer\n"
41027" * values respectively. Each channel will be stored in a\n"
41028" * 32-bit integer.\n"
41029" *\n"
41030" * read_imagei can only be used with image objects\n"
41031" * created with image_channel_data_type set to one of\n"
41032" * the following values:\n"
41033" * CL_SIGNED_INT8,\n"
41034" * CL_SIGNED_INT16 and\n"
41035" * CL_SIGNED_INT32.\n"
41036" * If the image_channel_data_type is not one of the\n"
41037" * above values, the values returned by read_imagei\n"
41038" * are undefined.\n"
41039" *\n"
41040" * read_imageui can only be used with image objects\n"
41041" * created with image_channel_data_type set to one of\n"
41042" * the following values:\n"
41043" * CL_UNSIGNED_INT8,\n"
41044" * CL_UNSIGNED_INT16 and\n"
41045" * CL_UNSIGNED_INT32.\n"
41046" * If the image_channel_data_type is not one of the\n"
41047" * above values, the values returned by read_imageui\n"
41048" * are undefined.\n"
41049" *\n"
41050" * The read_image{i|ui} calls support a nearest filter\n"
41051" * only. The filter_mode specified in sampler\n"
41052" * must be set to CLK_FILTER_NEAREST; otherwise\n"
41053" * the values returned are undefined.\n"
41054"\n"
41055" * The read_image{f|i|ui} calls that take\n"
41056" * integer coordinates must use a sampler with\n"
41057" * normalized coordinates set to\n"
41058" * CLK_NORMALIZED_COORDS_FALSE and\n"
41059" * addressing mode set to\n"
41060" * CLK_ADDRESS_CLAMP_TO_EDGE,\n"
41061" * CLK_ADDRESS_CLAMP or CLK_ADDRESS_NONE;\n"
41062" * otherwise the values returned are undefined.\n"
41063" *\n"
41064" * Values returned by read_imagef for image objects\n"
41065" * with image_channel_data_type values not specified\n"
41066" * in the description above are undefined.\n"
41067" */\n"
41068"\n"
41069"float4 __purefn __ovld read_imagef(read_only image2d_t image, sampler_t sampler, int2 coord);\n"
41070"float4 __purefn __ovld read_imagef(read_only image2d_t image, sampler_t sampler, float2 coord);\n"
41071"\n"
41072"int4 __purefn __ovld read_imagei(read_only image2d_t image, sampler_t sampler, int2 coord);\n"
41073"int4 __purefn __ovld read_imagei(read_only image2d_t image, sampler_t sampler, float2 coord);\n"
41074"uint4 __purefn __ovld read_imageui(read_only image2d_t image, sampler_t sampler, int2 coord);\n"
41075"uint4 __purefn __ovld read_imageui(read_only image2d_t image, sampler_t sampler, float2 coord);\n"
41076"\n"
41077"float4 __purefn __ovld read_imagef(read_only image3d_t image, sampler_t sampler, int4 coord);\n"
41078"float4 __purefn __ovld read_imagef(read_only image3d_t image, sampler_t sampler, float4 coord);\n"
41079"\n"
41080"int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, int4 coord);\n"
41081"int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, float4 coord);\n"
41082"uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, int4 coord);\n"
41083"uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, float4 coord);\n"
41084"\n"
41085"float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, int4 coord);\n"
41086"float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, float4 coord);\n"
41087"\n"
41088"int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_t sampler, int4 coord);\n"
41089"int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_t sampler, float4 coord);\n"
41090"uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, int4 coord);\n"
41091"uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, float4 coord);\n"
41092"\n"
41093"float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, int coord);\n"
41094"float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, float coord);\n"
41095"\n"
41096"int4 __purefn __ovld read_imagei(read_only image1d_t image, sampler_t sampler, int coord);\n"
41097"int4 __purefn __ovld read_imagei(read_only image1d_t image, sampler_t sampler, float coord);\n"
41098"uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, int coord);\n"
41099"uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, float coord);\n"
41100"\n"
41101"float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, int2 coord);\n"
41102"float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, float2 coord);\n"
41103"\n"
41104"int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_t sampler, int2 coord);\n"
41105"int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_t sampler, float2 coord);\n"
41106"uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, int2 coord);\n"
41107"uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, float2 coord);\n"
41108"\n"
41109"#ifdef cl_khr_depth_images\n"
41110"float __purefn __ovld read_imagef(read_only image2d_depth_t image, sampler_t sampler, float2 coord);\n"
41111"float __purefn __ovld read_imagef(read_only image2d_depth_t image, sampler_t sampler, int2 coord);\n"
41112"\n"
41113"float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, sampler_t sampler, float4 coord);\n"
41114"float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, sampler_t sampler, int4 coord);\n"
41115"#endif //cl_khr_depth_images\n"
41116"\n"
41117"#if defined(cl_khr_gl_msaa_sharing)\n"
41118"float4 __purefn __ovld read_imagef(read_only image2d_msaa_t image, int2 coord, int sample);\n"
41119"int4 __purefn __ovld read_imagei(read_only image2d_msaa_t image, int2 coord, int sample);\n"
41120"uint4 __purefn __ovld read_imageui(read_only image2d_msaa_t image, int2 coord, int sample);\n"
41121"\n"
41122"float __purefn __ovld read_imagef(read_only image2d_msaa_depth_t image, int2 coord, int sample);\n"
41123"\n"
41124"float4 __purefn __ovld read_imagef(read_only image2d_array_msaa_t image, int4 coord, int sample);\n"
41125"int4 __purefn __ovld read_imagei(read_only image2d_array_msaa_t image, int4 coord, int sample);\n"
41126"uint4 __purefn __ovld read_imageui(read_only image2d_array_msaa_t image, int4 coord, int sample);\n"
41127"\n"
41128"float __purefn __ovld read_imagef(read_only image2d_array_msaa_depth_t image, int4 coord, int sample);\n"
41129"#endif //cl_khr_gl_msaa_sharing\n"
41130"\n"
41131"// OpenCL Extension v2.0 s9.18 - Mipmaps\n"
41132"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
41133"#ifdef cl_khr_mipmap_image\n"
41134"\n"
41135"float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, float coord, float lod);\n"
41136"int4 __purefn __ovld read_imagei(read_only image1d_t image, sampler_t sampler, float coord, float lod);\n"
41137"uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, float coord, float lod);\n"
41138"\n"
41139"float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n"
41140"int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n"
41141"uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n"
41142"\n"
41143"float4 __purefn __ovld read_imagef(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);\n"
41144"int4 __purefn __ovld read_imagei(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);\n"
41145"uint4 __purefn __ovld read_imageui(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);\n"
41146"\n"
41147"float __purefn __ovld read_imagef(read_only image2d_depth_t image, sampler_t sampler, float2 coord, float lod);\n"
41148"\n"
41149"float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n"
41150"int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n"
41151"uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n"
41152"\n"
41153"float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, sampler_t sampler, float4 coord, float lod);\n"
41154"\n"
41155"float4 __purefn __ovld read_imagef(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);\n"
41156"int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);\n"
41157"uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);\n"
41158"\n"
41159"float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, float coord, float gradientX, float gradientY);\n"
41160"int4 __purefn __ovld read_imagei(read_only image1d_t image, sampler_t sampler, float coord, float gradientX, float gradientY);\n"
41161"uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, float coord, float gradientX, float gradientY);\n"
41162"\n"
41163"float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float gradientX, float gradientY);\n"
41164"int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float gradientX, float gradientY);\n"
41165"uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float gradientX, float gradientY);\n"
41166"\n"
41167"float4 __purefn __ovld read_imagef(read_only image2d_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n"
41168"int4 __purefn __ovld read_imagei(read_only image2d_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n"
41169"uint4 __purefn __ovld read_imageui(read_only image2d_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n"
41170"\n"
41171"float __purefn __ovld read_imagef(read_only image2d_depth_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n"
41172"\n"
41173"float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n"
41174"int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n"
41175"uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n"
41176"\n"
41177"float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n"
41178"\n"
41179"float4 __purefn __ovld read_imagef(read_only image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);\n"
41180"int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);\n"
41181"uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);\n"
41182"\n"
41183"float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, float coord, float lod);\n"
41184"int4 __purefn __ovld read_imagei(read_only image1d_t image, sampler_t sampler, float coord, float lod);\n"
41185"uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, float coord, float lod);\n"
41186"\n"
41187"float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n"
41188"int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n"
41189"uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n"
41190"\n"
41191"float4 __purefn __ovld read_imagef(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);\n"
41192"int4 __purefn __ovld read_imagei(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);\n"
41193"uint4 __purefn __ovld read_imageui(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);\n"
41194"\n"
41195"float __purefn __ovld read_imagef(read_only image2d_depth_t image, sampler_t sampler, float2 coord, float lod);\n"
41196"\n"
41197"float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n"
41198"int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n"
41199"uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n"
41200"\n"
41201"float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, sampler_t sampler, float4 coord, float lod);\n"
41202"\n"
41203"float4 __purefn __ovld read_imagef(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);\n"
41204"int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);\n"
41205"uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);\n"
41206"\n"
41207"#endif //cl_khr_mipmap_image\n"
41208"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
41209"\n"
41210"/**\n"
41211"* Sampler-less Image Access\n"
41212"*/\n"
41213"\n"
41214"float4 __purefn __ovld read_imagef(read_only image1d_t image, int coord);\n"
41215"int4 __purefn __ovld read_imagei(read_only image1d_t image, int coord);\n"
41216"uint4 __purefn __ovld read_imageui(read_only image1d_t image, int coord);\n"
41217"\n"
41218"float4 __purefn __ovld read_imagef(read_only image1d_buffer_t image, int coord);\n"
41219"int4 __purefn __ovld read_imagei(read_only image1d_buffer_t image, int coord);\n"
41220"uint4 __purefn __ovld read_imageui(read_only image1d_buffer_t image, int coord);\n"
41221"\n"
41222"float4 __purefn __ovld read_imagef(read_only image1d_array_t image, int2 coord);\n"
41223"int4 __purefn __ovld read_imagei(read_only image1d_array_t image, int2 coord);\n"
41224"uint4 __purefn __ovld read_imageui(read_only image1d_array_t image, int2 coord);\n"
41225"\n"
41226"float4 __purefn __ovld read_imagef(read_only image2d_t image, int2 coord);\n"
41227"int4 __purefn __ovld read_imagei(read_only image2d_t image, int2 coord);\n"
41228"uint4 __purefn __ovld read_imageui(read_only image2d_t image, int2 coord);\n"
41229"\n"
41230"float4 __purefn __ovld read_imagef(read_only image2d_array_t image, int4 coord);\n"
41231"int4 __purefn __ovld read_imagei(read_only image2d_array_t image, int4 coord);\n"
41232"uint4 __purefn __ovld read_imageui(read_only image2d_array_t image, int4 coord);\n"
41233"\n"
41234"#ifdef cl_khr_depth_images\n"
41235"float __purefn __ovld read_imagef(read_only image2d_depth_t image, int2 coord);\n"
41236"float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, int4 coord);\n"
41237"#endif //cl_khr_depth_images\n"
41238"\n"
41239"float4 __purefn __ovld read_imagef(read_only image3d_t image, int4 coord);\n"
41240"int4 __purefn __ovld read_imagei(read_only image3d_t image, int4 coord);\n"
41241"uint4 __purefn __ovld read_imageui(read_only image3d_t image, int4 coord);\n"
41242"\n"
41243"// Image read functions returning half4 type\n"
41244"#ifdef cl_khr_fp16\n"
41245"half4 __purefn __ovld read_imageh(read_only image1d_t image, sampler_t sampler, int coord);\n"
41246"half4 __purefn __ovld read_imageh(read_only image1d_t image, sampler_t sampler, float coord);\n"
41247"half4 __purefn __ovld read_imageh(read_only image1d_array_t image, sampler_t sampler, int2 coord);\n"
41248"half4 __purefn __ovld read_imageh(read_only image1d_array_t image, sampler_t sampler, float2 coord);\n"
41249"half4 __purefn __ovld read_imageh(read_only image2d_t image, sampler_t sampler, int2 coord);\n"
41250"half4 __purefn __ovld read_imageh(read_only image2d_t image, sampler_t sampler, float2 coord);\n"
41251"half4 __purefn __ovld read_imageh(read_only image3d_t image, sampler_t sampler, int4 coord);\n"
41252"half4 __purefn __ovld read_imageh(read_only image3d_t image, sampler_t sampler, float4 coord);\n"
41253"half4 __purefn __ovld read_imageh(read_only image2d_array_t image, sampler_t sampler, int4 coord);\n"
41254"half4 __purefn __ovld read_imageh(read_only image2d_array_t image, sampler_t sampler, float4 coord);\n"
41255"half4 __purefn __ovld read_imageh(read_only image1d_t image, int coord);\n"
41256"half4 __purefn __ovld read_imageh(read_only image2d_t image, int2 coord);\n"
41257"half4 __purefn __ovld read_imageh(read_only image3d_t image, int4 coord);\n"
41258"half4 __purefn __ovld read_imageh(read_only image1d_array_t image, int2 coord);\n"
41259"half4 __purefn __ovld read_imageh(read_only image2d_array_t image, int4 coord);\n"
41260"half4 __purefn __ovld read_imageh(read_only image1d_buffer_t image, int coord);\n"
41261"#endif //cl_khr_fp16\n"
41262"\n"
41263"// Image read functions for read_write images\n"
41264"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
41265"float4 __purefn __ovld read_imagef(read_write image1d_t image, int coord);\n"
41266"int4 __purefn __ovld read_imagei(read_write image1d_t image, int coord);\n"
41267"uint4 __purefn __ovld read_imageui(read_write image1d_t image, int coord);\n"
41268"\n"
41269"float4 __purefn __ovld read_imagef(read_write image1d_buffer_t image, int coord);\n"
41270"int4 __purefn __ovld read_imagei(read_write image1d_buffer_t image, int coord);\n"
41271"uint4 __purefn __ovld read_imageui(read_write image1d_buffer_t image, int coord);\n"
41272"\n"
41273"float4 __purefn __ovld read_imagef(read_write image1d_array_t image, int2 coord);\n"
41274"int4 __purefn __ovld read_imagei(read_write image1d_array_t image, int2 coord);\n"
41275"uint4 __purefn __ovld read_imageui(read_write image1d_array_t image, int2 coord);\n"
41276"\n"
41277"float4 __purefn __ovld read_imagef(read_write image2d_t image, int2 coord);\n"
41278"int4 __purefn __ovld read_imagei(read_write image2d_t image, int2 coord);\n"
41279"uint4 __purefn __ovld read_imageui(read_write image2d_t image, int2 coord);\n"
41280"\n"
41281"float4 __purefn __ovld read_imagef(read_write image2d_array_t image, int4 coord);\n"
41282"int4 __purefn __ovld read_imagei(read_write image2d_array_t image, int4 coord);\n"
41283"uint4 __purefn __ovld read_imageui(read_write image2d_array_t image, int4 coord);\n"
41284"\n"
41285"float4 __purefn __ovld read_imagef(read_write image3d_t image, int4 coord);\n"
41286"int4 __purefn __ovld read_imagei(read_write image3d_t image, int4 coord);\n"
41287"uint4 __purefn __ovld read_imageui(read_write image3d_t image, int4 coord);\n"
41288"\n"
41289"#ifdef cl_khr_depth_images\n"
41290"float __purefn __ovld read_imagef(read_write image2d_depth_t image, int2 coord);\n"
41291"float __purefn __ovld read_imagef(read_write image2d_array_depth_t image, int4 coord);\n"
41292"#endif //cl_khr_depth_images\n"
41293"\n"
41294"#if cl_khr_gl_msaa_sharing\n"
41295"float4 __purefn __ovld read_imagef(read_write image2d_msaa_t image, int2 coord, int sample);\n"
41296"int4 __purefn __ovld read_imagei(read_write image2d_msaa_t image, int2 coord, int sample);\n"
41297"uint4 __purefn __ovld read_imageui(read_write image2d_msaa_t image, int2 coord, int sample);\n"
41298"\n"
41299"float4 __purefn __ovld read_imagef(read_write image2d_array_msaa_t image, int4 coord, int sample);\n"
41300"int4 __purefn __ovld read_imagei(read_write image2d_array_msaa_t image, int4 coord, int sample);\n"
41301"uint4 __purefn __ovld read_imageui(read_write image2d_array_msaa_t image, int4 coord, int sample);\n"
41302"\n"
41303"float __purefn __ovld read_imagef(read_write image2d_msaa_depth_t image, int2 coord, int sample);\n"
41304"float __purefn __ovld read_imagef(read_write image2d_array_msaa_depth_t image, int4 coord, int sample);\n"
41305"#endif //cl_khr_gl_msaa_sharing\n"
41306"\n"
41307"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
41308"#ifdef cl_khr_mipmap_image\n"
41309"float4 __purefn __ovld read_imagef(read_write image1d_t image, sampler_t sampler, float coord, float lod);\n"
41310"int4 __purefn __ovld read_imagei(read_write image1d_t image, sampler_t sampler, float coord, float lod);\n"
41311"uint4 __purefn __ovld read_imageui(read_write image1d_t image, sampler_t sampler, float coord, float lod);\n"
41312"\n"
41313"float4 __purefn __ovld read_imagef(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n"
41314"int4 __purefn __ovld read_imagei(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n"
41315"uint4 __purefn __ovld read_imageui(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n"
41316"\n"
41317"float4 __purefn __ovld read_imagef(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);\n"
41318"int4 __purefn __ovld read_imagei(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);\n"
41319"uint4 __purefn __ovld read_imageui(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);\n"
41320"\n"
41321"float __purefn __ovld read_imagef(read_write image2d_depth_t image, sampler_t sampler, float2 coord, float lod);\n"
41322"\n"
41323"float4 __purefn __ovld read_imagef(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n"
41324"int4 __purefn __ovld read_imagei(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n"
41325"uint4 __purefn __ovld read_imageui(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n"
41326"\n"
41327"float __purefn __ovld read_imagef(read_write image2d_array_depth_t image, sampler_t sampler, float4 coord, float lod);\n"
41328"\n"
41329"float4 __purefn __ovld read_imagef(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);\n"
41330"int4 __purefn __ovld read_imagei(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);\n"
41331"uint4 __purefn __ovld read_imageui(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);\n"
41332"\n"
41333"float4 __purefn __ovld read_imagef(read_write image1d_t image, sampler_t sampler, float coord, float gradientX, float gradientY);\n"
41334"int4 __purefn __ovld read_imagei(read_write image1d_t image, sampler_t sampler, float coord, float gradientX, float gradientY);\n"
41335"uint4 __purefn __ovld read_imageui(read_write image1d_t image, sampler_t sampler, float coord, float gradientX, float gradientY);\n"
41336"\n"
41337"float4 __purefn __ovld read_imagef(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float gradientX, float gradientY);\n"
41338"int4 __purefn __ovld read_imagei(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float gradientX, float gradientY);\n"
41339"uint4 __purefn __ovld read_imageui(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float gradientX, float gradientY);\n"
41340"\n"
41341"float4 __purefn __ovld read_imagef(read_write image2d_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n"
41342"int4 __purefn __ovld read_imagei(read_write image2d_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n"
41343"uint4 __purefn __ovld read_imageui(read_write image2d_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n"
41344"\n"
41345"float __purefn __ovld read_imagef(read_write image2d_depth_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n"
41346"\n"
41347"float4 __purefn __ovld read_imagef(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n"
41348"int4 __purefn __ovld read_imagei(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n"
41349"uint4 __purefn __ovld read_imageui(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n"
41350"\n"
41351"float __purefn __ovld read_imagef(read_write image2d_array_depth_t image, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n"
41352"\n"
41353"float4 __purefn __ovld read_imagef(read_write image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);\n"
41354"int4 __purefn __ovld read_imagei(read_write image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);\n"
41355"uint4 __purefn __ovld read_imageui(read_write image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);\n"
41356"\n"
41357"float4 __purefn __ovld read_imagef(read_write image1d_t image, sampler_t sampler, float coord, float lod);\n"
41358"int4 __purefn __ovld read_imagei(read_write image1d_t image, sampler_t sampler, float coord, float lod);\n"
41359"uint4 __purefn __ovld read_imageui(read_write image1d_t image, sampler_t sampler, float coord, float lod);\n"
41360"\n"
41361"float4 __purefn __ovld read_imagef(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n"
41362"int4 __purefn __ovld read_imagei(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n"
41363"uint4 __purefn __ovld read_imageui(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n"
41364"\n"
41365"float4 __purefn __ovld read_imagef(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);\n"
41366"int4 __purefn __ovld read_imagei(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);\n"
41367"uint4 __purefn __ovld read_imageui(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);\n"
41368"\n"
41369"float __purefn __ovld read_imagef(read_write image2d_depth_t image, sampler_t sampler, float2 coord, float lod);\n"
41370"\n"
41371"float4 __purefn __ovld read_imagef(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n"
41372"int4 __purefn __ovld read_imagei(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n"
41373"uint4 __purefn __ovld read_imageui(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n"
41374"\n"
41375"float __purefn __ovld read_imagef(read_write image2d_array_depth_t image, sampler_t sampler, float4 coord, float lod);\n"
41376"\n"
41377"float4 __purefn __ovld read_imagef(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);\n"
41378"int4 __purefn __ovld read_imagei(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);\n"
41379"uint4 __purefn __ovld read_imageui(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);\n"
41380"#endif //cl_khr_mipmap_image\n"
41381"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
41382"\n"
41383"// Image read functions returning half4 type\n"
41384"#ifdef cl_khr_fp16\n"
41385"half4 __purefn __ovld read_imageh(read_write image1d_t image, int coord);\n"
41386"half4 __purefn __ovld read_imageh(read_write image2d_t image, int2 coord);\n"
41387"half4 __purefn __ovld read_imageh(read_write image3d_t image, int4 coord);\n"
41388"half4 __purefn __ovld read_imageh(read_write image1d_array_t image, int2 coord);\n"
41389"half4 __purefn __ovld read_imageh(read_write image2d_array_t image, int4 coord);\n"
41390"half4 __purefn __ovld read_imageh(read_write image1d_buffer_t image, int coord);\n"
41391"#endif //cl_khr_fp16\n"
41392"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
41393"\n"
41394"/**\n"
41395" * Write color value to location specified by coordinate\n"
41396" * (coord.x, coord.y) in the 2D image object specified by image.\n"
41397" * (coord.x, coord.y) are considered to be unnormalized coordinates\n"
41398" * and must be in the range 0 ... image width - 1, and 0\n"
41399" * ... image height - 1.\n"
41400"\n"
41401" * Write color value to location specified by coordinate\n"
41402" * (coord.x, coord.y) in the 2D image object specified by index\n"
41403" * (coord.z) of the 2D image array object image_array.\n"
41404" * (coord.x, coord.y) are considered to be unnormalized\n"
41405" * coordinates and must be in the range 0 ... image width\n"
41406" * - 1.\n"
41407" *\n"
41408" * Write color value to location specified by coordinate\n"
41409" * (coord) in the 1D image (buffer) object specified by image.\n"
41410" * coord is considered to be unnormalized coordinates\n"
41411" * and must be in the range 0 ... image width - 1.\n"
41412" *\n"
41413" * Write color value to location specified by coordinate\n"
41414" * (coord.x) in the 1D image object specified by index\n"
41415" * (coord.y) of the 1D image array object image_array.\n"
41416" * x is considered to be unnormalized coordinates\n"
41417" * and must be in the range 0 ... image width - 1.\n"
41418" *\n"
41419" * Write color value to location specified by coordinate\n"
41420" * (coord.x, coord.y, coord.z) in the 3D image object specified by image.\n"
41421" * coord.x & coord.y are considered to be unnormalized coordinates\n"
41422" * and must be in the range 0 ... image width - 1, and 0\n"
41423" * ... image height - 1.\n"
41424" *\n"
41425" * For mipmap images, use mip-level specified by lod.\n"
41426" *\n"
41427" * Appropriate data format conversion to the specified\n"
41428" * image format is done before writing the color value.\n"
41429" *\n"
41430" * write_imagef can only be used with image objects\n"
41431" * created with image_channel_data_type set to one of\n"
41432" * the pre-defined packed formats or set to\n"
41433" * CL_SNORM_INT8, CL_UNORM_INT8,\n"
41434" * CL_SNORM_INT16, CL_UNORM_INT16,\n"
41435" * CL_HALF_FLOAT or CL_FLOAT. Appropriate data\n"
41436" * format conversion will be done to convert channel\n"
41437" * data from a floating-point value to actual data format\n"
41438" * in which the channels are stored.\n"
41439" *\n"
41440" * write_imagei can only be used with image objects\n"
41441" * created with image_channel_data_type set to one of\n"
41442" * the following values:\n"
41443" * CL_SIGNED_INT8,\n"
41444" * CL_SIGNED_INT16 and\n"
41445" * CL_SIGNED_INT32.\n"
41446" *\n"
41447" * write_imageui can only be used with image objects\n"
41448" * created with image_channel_data_type set to one of\n"
41449" * the following values:\n"
41450" * CL_UNSIGNED_INT8,\n"
41451" * CL_UNSIGNED_INT16 and\n"
41452" * CL_UNSIGNED_INT32.\n"
41453" *\n"
41454" * The behavior of write_imagef, write_imagei and\n"
41455" * write_imageui for image objects created with\n"
41456" * image_channel_data_type values not specified in\n"
41457" * the description above or with (x, y) coordinate\n"
41458" * values that are not in the range (0 ... image width -1,\n"
41459" * 0 ... image height - 1), respectively, is undefined.\n"
41460" */\n"
41461"void __ovld write_imagef(write_only image2d_t image, int2 coord, float4 color);\n"
41462"void __ovld write_imagei(write_only image2d_t image, int2 coord, int4 color);\n"
41463"void __ovld write_imageui(write_only image2d_t image, int2 coord, uint4 color);\n"
41464"\n"
41465"void __ovld write_imagef(write_only image2d_array_t image_array, int4 coord, float4 color);\n"
41466"void __ovld write_imagei(write_only image2d_array_t image_array, int4 coord, int4 color);\n"
41467"void __ovld write_imageui(write_only image2d_array_t image_array, int4 coord, uint4 color);\n"
41468"\n"
41469"void __ovld write_imagef(write_only image1d_t image, int coord, float4 color);\n"
41470"void __ovld write_imagei(write_only image1d_t image, int coord, int4 color);\n"
41471"void __ovld write_imageui(write_only image1d_t image, int coord, uint4 color);\n"
41472"\n"
41473"void __ovld write_imagef(write_only image1d_buffer_t image, int coord, float4 color);\n"
41474"void __ovld write_imagei(write_only image1d_buffer_t image, int coord, int4 color);\n"
41475"void __ovld write_imageui(write_only image1d_buffer_t image, int coord, uint4 color);\n"
41476"\n"
41477"void __ovld write_imagef(write_only image1d_array_t image_array, int2 coord, float4 color);\n"
41478"void __ovld write_imagei(write_only image1d_array_t image_array, int2 coord, int4 color);\n"
41479"void __ovld write_imageui(write_only image1d_array_t image_array, int2 coord, uint4 color);\n"
41480"\n"
41481"#ifdef cl_khr_3d_image_writes\n"
41482"void __ovld write_imagef(write_only image3d_t image, int4 coord, float4 color);\n"
41483"void __ovld write_imagei(write_only image3d_t image, int4 coord, int4 color);\n"
41484"void __ovld write_imageui(write_only image3d_t image, int4 coord, uint4 color);\n"
41485"#endif\n"
41486"\n"
41487"#ifdef cl_khr_depth_images\n"
41488"void __ovld write_imagef(write_only image2d_depth_t image, int2 coord, float color);\n"
41489"void __ovld write_imagef(write_only image2d_array_depth_t image, int4 coord, float color);\n"
41490"#endif //cl_khr_depth_images\n"
41491"\n"
41492"// OpenCL Extension v2.0 s9.18 - Mipmaps\n"
41493"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
41494"#ifdef cl_khr_mipmap_image\n"
41495"void __ovld write_imagef(write_only image1d_t image, int coord, int lod, float4 color);\n"
41496"void __ovld write_imagei(write_only image1d_t image, int coord, int lod, int4 color);\n"
41497"void __ovld write_imageui(write_only image1d_t image, int coord, int lod, uint4 color);\n"
41498"\n"
41499"void __ovld write_imagef(write_only image1d_array_t image_array, int2 coord, int lod, float4 color);\n"
41500"void __ovld write_imagei(write_only image1d_array_t image_array, int2 coord, int lod, int4 color);\n"
41501"void __ovld write_imageui(write_only image1d_array_t image_array, int2 coord, int lod, uint4 color);\n"
41502"\n"
41503"void __ovld write_imagef(write_only image2d_t image, int2 coord, int lod, float4 color);\n"
41504"void __ovld write_imagei(write_only image2d_t image, int2 coord, int lod, int4 color);\n"
41505"void __ovld write_imageui(write_only image2d_t image, int2 coord, int lod, uint4 color);\n"
41506"\n"
41507"void __ovld write_imagef(write_only image2d_array_t image_array, int4 coord, int lod, float4 color);\n"
41508"void __ovld write_imagei(write_only image2d_array_t image_array, int4 coord, int lod, int4 color);\n"
41509"void __ovld write_imageui(write_only image2d_array_t image_array, int4 coord, int lod, uint4 color);\n"
41510"\n"
41511"void __ovld write_imagef(write_only image2d_depth_t image, int2 coord, int lod, float color);\n"
41512"void __ovld write_imagef(write_only image2d_array_depth_t image, int4 coord, int lod, float color);\n"
41513"\n"
41514"#ifdef cl_khr_3d_image_writes\n"
41515"void __ovld write_imagef(write_only image3d_t image, int4 coord, int lod, float4 color);\n"
41516"void __ovld write_imagei(write_only image3d_t image, int4 coord, int lod, int4 color);\n"
41517"void __ovld write_imageui(write_only image3d_t image, int4 coord, int lod, uint4 color);\n"
41518"#endif\n"
41519"#endif //cl_khr_mipmap_image\n"
41520"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
41521"\n"
41522"// Image write functions for half4 type\n"
41523"#ifdef cl_khr_fp16\n"
41524"void __ovld write_imageh(write_only image1d_t image, int coord, half4 color);\n"
41525"void __ovld write_imageh(write_only image2d_t image, int2 coord, half4 color);\n"
41526"#ifdef cl_khr_3d_image_writes\n"
41527"void __ovld write_imageh(write_only image3d_t image, int4 coord, half4 color);\n"
41528"#endif\n"
41529"void __ovld write_imageh(write_only image1d_array_t image, int2 coord, half4 color);\n"
41530"void __ovld write_imageh(write_only image2d_array_t image, int4 coord, half4 color);\n"
41531"void __ovld write_imageh(write_only image1d_buffer_t image, int coord, half4 color);\n"
41532"#endif //cl_khr_fp16\n"
41533"\n"
41534"// Image write functions for read_write images\n"
41535"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
41536"void __ovld write_imagef(read_write image2d_t image, int2 coord, float4 color);\n"
41537"void __ovld write_imagei(read_write image2d_t image, int2 coord, int4 color);\n"
41538"void __ovld write_imageui(read_write image2d_t image, int2 coord, uint4 color);\n"
41539"\n"
41540"void __ovld write_imagef(read_write image2d_array_t image_array, int4 coord, float4 color);\n"
41541"void __ovld write_imagei(read_write image2d_array_t image_array, int4 coord, int4 color);\n"
41542"void __ovld write_imageui(read_write image2d_array_t image_array, int4 coord, uint4 color);\n"
41543"\n"
41544"void __ovld write_imagef(read_write image1d_t image, int coord, float4 color);\n"
41545"void __ovld write_imagei(read_write image1d_t image, int coord, int4 color);\n"
41546"void __ovld write_imageui(read_write image1d_t image, int coord, uint4 color);\n"
41547"\n"
41548"void __ovld write_imagef(read_write image1d_buffer_t image, int coord, float4 color);\n"
41549"void __ovld write_imagei(read_write image1d_buffer_t image, int coord, int4 color);\n"
41550"void __ovld write_imageui(read_write image1d_buffer_t image, int coord, uint4 color);\n"
41551"\n"
41552"void __ovld write_imagef(read_write image1d_array_t image_array, int2 coord, float4 color);\n"
41553"void __ovld write_imagei(read_write image1d_array_t image_array, int2 coord, int4 color);\n"
41554"void __ovld write_imageui(read_write image1d_array_t image_array, int2 coord, uint4 color);\n"
41555"\n"
41556"#ifdef cl_khr_3d_image_writes\n"
41557"void __ovld write_imagef(read_write image3d_t image, int4 coord, float4 color);\n"
41558"void __ovld write_imagei(read_write image3d_t image, int4 coord, int4 color);\n"
41559"void __ovld write_imageui(read_write image3d_t image, int4 coord, uint4 color);\n"
41560"#endif\n"
41561"\n"
41562"#ifdef cl_khr_depth_images\n"
41563"void __ovld write_imagef(read_write image2d_depth_t image, int2 coord, float color);\n"
41564"void __ovld write_imagef(read_write image2d_array_depth_t image, int4 coord, float color);\n"
41565"#endif //cl_khr_depth_images\n"
41566"\n"
41567"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
41568"#ifdef cl_khr_mipmap_image\n"
41569"void __ovld write_imagef(read_write image1d_t image, int coord, int lod, float4 color);\n"
41570"void __ovld write_imagei(read_write image1d_t image, int coord, int lod, int4 color);\n"
41571"void __ovld write_imageui(read_write image1d_t image, int coord, int lod, uint4 color);\n"
41572"\n"
41573"void __ovld write_imagef(read_write image1d_array_t image_array, int2 coord, int lod, float4 color);\n"
41574"void __ovld write_imagei(read_write image1d_array_t image_array, int2 coord, int lod, int4 color);\n"
41575"void __ovld write_imageui(read_write image1d_array_t image_array, int2 coord, int lod, uint4 color);\n"
41576"\n"
41577"void __ovld write_imagef(read_write image2d_t image, int2 coord, int lod, float4 color);\n"
41578"void __ovld write_imagei(read_write image2d_t image, int2 coord, int lod, int4 color);\n"
41579"void __ovld write_imageui(read_write image2d_t image, int2 coord, int lod, uint4 color);\n"
41580"\n"
41581"void __ovld write_imagef(read_write image2d_array_t image_array, int4 coord, int lod, float4 color);\n"
41582"void __ovld write_imagei(read_write image2d_array_t image_array, int4 coord, int lod, int4 color);\n"
41583"void __ovld write_imageui(read_write image2d_array_t image_array, int4 coord, int lod, uint4 color);\n"
41584"\n"
41585"void __ovld write_imagef(read_write image2d_depth_t image, int2 coord, int lod, float color);\n"
41586"void __ovld write_imagef(read_write image2d_array_depth_t image, int4 coord, int lod, float color);\n"
41587"\n"
41588"#ifdef cl_khr_3d_image_writes\n"
41589"void __ovld write_imagef(read_write image3d_t image, int4 coord, int lod, float4 color);\n"
41590"void __ovld write_imagei(read_write image3d_t image, int4 coord, int lod, int4 color);\n"
41591"void __ovld write_imageui(read_write image3d_t image, int4 coord, int lod, uint4 color);\n"
41592"#endif\n"
41593"#endif //cl_khr_mipmap_image\n"
41594"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
41595"\n"
41596"// Image write functions for half4 type\n"
41597"#ifdef cl_khr_fp16\n"
41598"void __ovld write_imageh(read_write image1d_t image, int coord, half4 color);\n"
41599"void __ovld write_imageh(read_write image2d_t image, int2 coord, half4 color);\n"
41600"#ifdef cl_khr_3d_image_writes\n"
41601"void __ovld write_imageh(read_write image3d_t image, int4 coord, half4 color);\n"
41602"#endif\n"
41603"void __ovld write_imageh(read_write image1d_array_t image, int2 coord, half4 color);\n"
41604"void __ovld write_imageh(read_write image2d_array_t image, int4 coord, half4 color);\n"
41605"void __ovld write_imageh(read_write image1d_buffer_t image, int coord, half4 color);\n"
41606"#endif //cl_khr_fp16\n"
41607"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
41608"\n"
41609"// Note: In OpenCL v1.0/1.1/1.2, image argument of image query builtin functions does not have\n"
41610"// access qualifier, which by default assume read_only access qualifier. Image query builtin\n"
41611"// functions with write_only image argument should also be declared.\n"
41612"\n"
41613"/**\n"
41614" * Return the image width in pixels.\n"
41615" *\n"
41616" */\n"
41617"int __ovld __cnfn get_image_width(read_only image1d_t image);\n"
41618"int __ovld __cnfn get_image_width(read_only image1d_buffer_t image);\n"
41619"int __ovld __cnfn get_image_width(read_only image2d_t image);\n"
41620"#ifdef cl_khr_3d_image_writes\n"
41621"int __ovld __cnfn get_image_width(read_only image3d_t image);\n"
41622"#endif\n"
41623"int __ovld __cnfn get_image_width(read_only image1d_array_t image);\n"
41624"int __ovld __cnfn get_image_width(read_only image2d_array_t image);\n"
41625"#ifdef cl_khr_depth_images\n"
41626"int __ovld __cnfn get_image_width(read_only image2d_depth_t image);\n"
41627"int __ovld __cnfn get_image_width(read_only image2d_array_depth_t image);\n"
41628"#endif //cl_khr_depth_images\n"
41629"#if defined(cl_khr_gl_msaa_sharing)\n"
41630"int __ovld __cnfn get_image_width(read_only image2d_msaa_t image);\n"
41631"int __ovld __cnfn get_image_width(read_only image2d_msaa_depth_t image);\n"
41632"int __ovld __cnfn get_image_width(read_only image2d_array_msaa_t image);\n"
41633"int __ovld __cnfn get_image_width(read_only image2d_array_msaa_depth_t image);\n"
41634"#endif //cl_khr_gl_msaa_sharing\n"
41635"\n"
41636"int __ovld __cnfn get_image_width(write_only image1d_t image);\n"
41637"int __ovld __cnfn get_image_width(write_only image1d_buffer_t image);\n"
41638"int __ovld __cnfn get_image_width(write_only image2d_t image);\n"
41639"#ifdef cl_khr_3d_image_writes\n"
41640"int __ovld __cnfn get_image_width(write_only image3d_t image);\n"
41641"#endif\n"
41642"int __ovld __cnfn get_image_width(write_only image1d_array_t image);\n"
41643"int __ovld __cnfn get_image_width(write_only image2d_array_t image);\n"
41644"#ifdef cl_khr_depth_images\n"
41645"int __ovld __cnfn get_image_width(write_only image2d_depth_t image);\n"
41646"int __ovld __cnfn get_image_width(write_only image2d_array_depth_t image);\n"
41647"#endif //cl_khr_depth_images\n"
41648"#if defined(cl_khr_gl_msaa_sharing)\n"
41649"int __ovld __cnfn get_image_width(write_only image2d_msaa_t image);\n"
41650"int __ovld __cnfn get_image_width(write_only image2d_msaa_depth_t image);\n"
41651"int __ovld __cnfn get_image_width(write_only image2d_array_msaa_t image);\n"
41652"int __ovld __cnfn get_image_width(write_only image2d_array_msaa_depth_t image);\n"
41653"#endif //cl_khr_gl_msaa_sharing\n"
41654"\n"
41655"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
41656"int __ovld __cnfn get_image_width(read_write image1d_t image);\n"
41657"int __ovld __cnfn get_image_width(read_write image1d_buffer_t image);\n"
41658"int __ovld __cnfn get_image_width(read_write image2d_t image);\n"
41659"int __ovld __cnfn get_image_width(read_write image3d_t image);\n"
41660"int __ovld __cnfn get_image_width(read_write image1d_array_t image);\n"
41661"int __ovld __cnfn get_image_width(read_write image2d_array_t image);\n"
41662"#ifdef cl_khr_depth_images\n"
41663"int __ovld __cnfn get_image_width(read_write image2d_depth_t image);\n"
41664"int __ovld __cnfn get_image_width(read_write image2d_array_depth_t image);\n"
41665"#endif //cl_khr_depth_images\n"
41666"#if defined(cl_khr_gl_msaa_sharing)\n"
41667"int __ovld __cnfn get_image_width(read_write image2d_msaa_t image);\n"
41668"int __ovld __cnfn get_image_width(read_write image2d_msaa_depth_t image);\n"
41669"int __ovld __cnfn get_image_width(read_write image2d_array_msaa_t image);\n"
41670"int __ovld __cnfn get_image_width(read_write image2d_array_msaa_depth_t image);\n"
41671"#endif //cl_khr_gl_msaa_sharing\n"
41672"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
41673"\n"
41674"/**\n"
41675" * Return the image height in pixels.\n"
41676" */\n"
41677"int __ovld __cnfn get_image_height(read_only image2d_t image);\n"
41678"int __ovld __cnfn get_image_height(read_only image3d_t image);\n"
41679"int __ovld __cnfn get_image_height(read_only image2d_array_t image);\n"
41680"#ifdef cl_khr_depth_images\n"
41681"int __ovld __cnfn get_image_height(read_only image2d_depth_t image);\n"
41682"int __ovld __cnfn get_image_height(read_only image2d_array_depth_t image);\n"
41683"#endif //cl_khr_depth_images\n"
41684"#if defined(cl_khr_gl_msaa_sharing)\n"
41685"int __ovld __cnfn get_image_height(read_only image2d_msaa_t image);\n"
41686"int __ovld __cnfn get_image_height(read_only image2d_msaa_depth_t image);\n"
41687"int __ovld __cnfn get_image_height(read_only image2d_array_msaa_t image);\n"
41688"int __ovld __cnfn get_image_height(read_only image2d_array_msaa_depth_t image);\n"
41689"#endif //cl_khr_gl_msaa_sharing\n"
41690"\n"
41691"int __ovld __cnfn get_image_height(write_only image2d_t image);\n"
41692"#ifdef cl_khr_3d_image_writes\n"
41693"int __ovld __cnfn get_image_height(write_only image3d_t image);\n"
41694"#endif\n"
41695"int __ovld __cnfn get_image_height(write_only image2d_array_t image);\n"
41696"#ifdef cl_khr_depth_images\n"
41697"int __ovld __cnfn get_image_height(write_only image2d_depth_t image);\n"
41698"int __ovld __cnfn get_image_height(write_only image2d_array_depth_t image);\n"
41699"#endif //cl_khr_depth_images\n"
41700"#if defined(cl_khr_gl_msaa_sharing)\n"
41701"int __ovld __cnfn get_image_height(write_only image2d_msaa_t image);\n"
41702"int __ovld __cnfn get_image_height(write_only image2d_msaa_depth_t image);\n"
41703"int __ovld __cnfn get_image_height(write_only image2d_array_msaa_t image);\n"
41704"int __ovld __cnfn get_image_height(write_only image2d_array_msaa_depth_t image);\n"
41705"#endif //cl_khr_gl_msaa_sharing\n"
41706"\n"
41707"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
41708"int __ovld __cnfn get_image_height(read_write image2d_t image);\n"
41709"int __ovld __cnfn get_image_height(read_write image3d_t image);\n"
41710"int __ovld __cnfn get_image_height(read_write image2d_array_t image);\n"
41711"#ifdef cl_khr_depth_images\n"
41712"int __ovld __cnfn get_image_height(read_write image2d_depth_t image);\n"
41713"int __ovld __cnfn get_image_height(read_write image2d_array_depth_t image);\n"
41714"#endif //cl_khr_depth_images\n"
41715"#if defined(cl_khr_gl_msaa_sharing)\n"
41716"int __ovld __cnfn get_image_height(read_write image2d_msaa_t image);\n"
41717"int __ovld __cnfn get_image_height(read_write image2d_msaa_depth_t image);\n"
41718"int __ovld __cnfn get_image_height(read_write image2d_array_msaa_t image);\n"
41719"int __ovld __cnfn get_image_height(read_write image2d_array_msaa_depth_t image);\n"
41720"#endif //cl_khr_gl_msaa_sharing\n"
41721"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
41722"\n"
41723"/**\n"
41724" * Return the image depth in pixels.\n"
41725" */\n"
41726"int __ovld __cnfn get_image_depth(read_only image3d_t image);\n"
41727"\n"
41728"#ifdef cl_khr_3d_image_writes\n"
41729"int __ovld __cnfn get_image_depth(write_only image3d_t image);\n"
41730"#endif\n"
41731"\n"
41732"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
41733"int __ovld __cnfn get_image_depth(read_write image3d_t image);\n"
41734"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
41735"\n"
41736"// OpenCL Extension v2.0 s9.18 - Mipmaps\n"
41737"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
41738"#ifdef cl_khr_mipmap_image\n"
41739"/**\n"
41740" * Return the image miplevels.\n"
41741" */\n"
41742"\n"
41743"int __ovld get_image_num_mip_levels(read_only image1d_t image);\n"
41744"int __ovld get_image_num_mip_levels(read_only image2d_t image);\n"
41745"int __ovld get_image_num_mip_levels(read_only image3d_t image);\n"
41746"\n"
41747"int __ovld get_image_num_mip_levels(write_only image1d_t image);\n"
41748"int __ovld get_image_num_mip_levels(write_only image2d_t image);\n"
41749"#ifdef cl_khr_3d_image_writes\n"
41750"int __ovld get_image_num_mip_levels(write_only image3d_t image);\n"
41751"#endif\n"
41752"\n"
41753"int __ovld get_image_num_mip_levels(read_write image1d_t image);\n"
41754"int __ovld get_image_num_mip_levels(read_write image2d_t image);\n"
41755"int __ovld get_image_num_mip_levels(read_write image3d_t image);\n"
41756"\n"
41757"int __ovld get_image_num_mip_levels(read_only image1d_array_t image);\n"
41758"int __ovld get_image_num_mip_levels(read_only image2d_array_t image);\n"
41759"int __ovld get_image_num_mip_levels(read_only image2d_array_depth_t image);\n"
41760"int __ovld get_image_num_mip_levels(read_only image2d_depth_t image);\n"
41761"\n"
41762"int __ovld get_image_num_mip_levels(write_only image1d_array_t image);\n"
41763"int __ovld get_image_num_mip_levels(write_only image2d_array_t image);\n"
41764"int __ovld get_image_num_mip_levels(write_only image2d_array_depth_t image);\n"
41765"int __ovld get_image_num_mip_levels(write_only image2d_depth_t image);\n"
41766"\n"
41767"int __ovld get_image_num_mip_levels(read_write image1d_array_t image);\n"
41768"int __ovld get_image_num_mip_levels(read_write image2d_array_t image);\n"
41769"int __ovld get_image_num_mip_levels(read_write image2d_array_depth_t image);\n"
41770"int __ovld get_image_num_mip_levels(read_write image2d_depth_t image);\n"
41771"\n"
41772"#endif //cl_khr_mipmap_image\n"
41773"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
41774"\n"
41775"/**\n"
41776" * Return the channel data type. Valid values are:\n"
41777" * CLK_SNORM_INT8\n"
41778" * CLK_SNORM_INT16\n"
41779" * CLK_UNORM_INT8\n"
41780" * CLK_UNORM_INT16\n"
41781" * CLK_UNORM_SHORT_565\n"
41782" * CLK_UNORM_SHORT_555\n"
41783" * CLK_UNORM_SHORT_101010\n"
41784" * CLK_SIGNED_INT8\n"
41785" * CLK_SIGNED_INT16\n"
41786" * CLK_SIGNED_INT32\n"
41787" * CLK_UNSIGNED_INT8\n"
41788" * CLK_UNSIGNED_INT16\n"
41789" * CLK_UNSIGNED_INT32\n"
41790" * CLK_HALF_FLOAT\n"
41791" * CLK_FLOAT\n"
41792" */\n"
41793"\n"
41794"//\n"
41795"// Channel Datatype.\n"
41796"//\n"
41797"#define CLK_SNORM_INT8 0x10D0\n"
41798"#define CLK_SNORM_INT16 0x10D1\n"
41799"#define CLK_UNORM_INT8 0x10D2\n"
41800"#define CLK_UNORM_INT16 0x10D3\n"
41801"#define CLK_UNORM_SHORT_565 0x10D4\n"
41802"#define CLK_UNORM_SHORT_555 0x10D5\n"
41803"#define CLK_UNORM_INT_101010 0x10D6\n"
41804"#define CLK_SIGNED_INT8 0x10D7\n"
41805"#define CLK_SIGNED_INT16 0x10D8\n"
41806"#define CLK_SIGNED_INT32 0x10D9\n"
41807"#define CLK_UNSIGNED_INT8 0x10DA\n"
41808"#define CLK_UNSIGNED_INT16 0x10DB\n"
41809"#define CLK_UNSIGNED_INT32 0x10DC\n"
41810"#define CLK_HALF_FLOAT 0x10DD\n"
41811"#define CLK_FLOAT 0x10DE\n"
41812"#define CLK_UNORM_INT24 0x10DF\n"
41813"\n"
41814"int __ovld __cnfn get_image_channel_data_type(read_only image1d_t image);\n"
41815"int __ovld __cnfn get_image_channel_data_type(read_only image1d_buffer_t image);\n"
41816"int __ovld __cnfn get_image_channel_data_type(read_only image2d_t image);\n"
41817"int __ovld __cnfn get_image_channel_data_type(read_only image3d_t image);\n"
41818"int __ovld __cnfn get_image_channel_data_type(read_only image1d_array_t image);\n"
41819"int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_t image);\n"
41820"#ifdef cl_khr_depth_images\n"
41821"int __ovld __cnfn get_image_channel_data_type(read_only image2d_depth_t image);\n"
41822"int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_depth_t image);\n"
41823"#endif //cl_khr_depth_images\n"
41824"#if defined(cl_khr_gl_msaa_sharing)\n"
41825"int __ovld __cnfn get_image_channel_data_type(read_only image2d_msaa_t image);\n"
41826"int __ovld __cnfn get_image_channel_data_type(read_only image2d_msaa_depth_t image);\n"
41827"int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_msaa_t image);\n"
41828"int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_msaa_depth_t image);\n"
41829"#endif //cl_khr_gl_msaa_sharing\n"
41830"\n"
41831"int __ovld __cnfn get_image_channel_data_type(write_only image1d_t image);\n"
41832"int __ovld __cnfn get_image_channel_data_type(write_only image1d_buffer_t image);\n"
41833"int __ovld __cnfn get_image_channel_data_type(write_only image2d_t image);\n"
41834"#ifdef cl_khr_3d_image_writes\n"
41835"int __ovld __cnfn get_image_channel_data_type(write_only image3d_t image);\n"
41836"#endif\n"
41837"int __ovld __cnfn get_image_channel_data_type(write_only image1d_array_t image);\n"
41838"int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_t image);\n"
41839"#ifdef cl_khr_depth_images\n"
41840"int __ovld __cnfn get_image_channel_data_type(write_only image2d_depth_t image);\n"
41841"int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_depth_t image);\n"
41842"#endif //cl_khr_depth_images\n"
41843"#if defined(cl_khr_gl_msaa_sharing)\n"
41844"int __ovld __cnfn get_image_channel_data_type(write_only image2d_msaa_t image);\n"
41845"int __ovld __cnfn get_image_channel_data_type(write_only image2d_msaa_depth_t image);\n"
41846"int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_msaa_t image);\n"
41847"int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_msaa_depth_t image);\n"
41848"#endif //cl_khr_gl_msaa_sharing\n"
41849"\n"
41850"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
41851"int __ovld __cnfn get_image_channel_data_type(read_write image1d_t image);\n"
41852"int __ovld __cnfn get_image_channel_data_type(read_write image1d_buffer_t image);\n"
41853"int __ovld __cnfn get_image_channel_data_type(read_write image2d_t image);\n"
41854"int __ovld __cnfn get_image_channel_data_type(read_write image3d_t image);\n"
41855"int __ovld __cnfn get_image_channel_data_type(read_write image1d_array_t image);\n"
41856"int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_t image);\n"
41857"#ifdef cl_khr_depth_images\n"
41858"int __ovld __cnfn get_image_channel_data_type(read_write image2d_depth_t image);\n"
41859"int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_depth_t image);\n"
41860"#endif //cl_khr_depth_images\n"
41861"#if defined(cl_khr_gl_msaa_sharing)\n"
41862"int __ovld __cnfn get_image_channel_data_type(read_write image2d_msaa_t image);\n"
41863"int __ovld __cnfn get_image_channel_data_type(read_write image2d_msaa_depth_t image);\n"
41864"int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_t image);\n"
41865"int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_depth_t image);\n"
41866"#endif //cl_khr_gl_msaa_sharing\n"
41867"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
41868"\n"
41869"/**\n"
41870" * Return the image channel order. Valid values are:\n"
41871" * CLK_A\n"
41872" * CLK_R\n"
41873" * CLK_Rx\n"
41874" * CLK_RG\n"
41875" * CLK_RGx\n"
41876" * CLK_RA\n"
41877" * CLK_RGB\n"
41878" * CLK_RGBx\n"
41879" * CLK_RGBA\n"
41880" * CLK_ARGB\n"
41881" * CLK_BGRA\n"
41882" * CLK_INTENSITY\n"
41883" * CLK_LUMINANCE\n"
41884" */\n"
41885"// Channel order, numbering must be aligned with cl_channel_order in cl.h\n"
41886"//\n"
41887"#define CLK_R 0x10B0\n"
41888"#define CLK_A 0x10B1\n"
41889"#define CLK_RG 0x10B2\n"
41890"#define CLK_RA 0x10B3\n"
41891"#define CLK_RGB 0x10B4\n"
41892"#define CLK_RGBA 0x10B5\n"
41893"#define CLK_BGRA 0x10B6\n"
41894"#define CLK_ARGB 0x10B7\n"
41895"#define CLK_INTENSITY 0x10B8\n"
41896"#define CLK_LUMINANCE 0x10B9\n"
41897"#define CLK_Rx 0x10BA\n"
41898"#define CLK_RGx 0x10BB\n"
41899"#define CLK_RGBx 0x10BC\n"
41900"#define CLK_DEPTH 0x10BD\n"
41901"#define CLK_DEPTH_STENCIL 0x10BE\n"
41902"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
41903"#define CLK_sRGB 0x10BF\n"
41904"#define CLK_sRGBx 0x10C0\n"
41905"#define CLK_sRGBA 0x10C1\n"
41906"#define CLK_sBGRA 0x10C2\n"
41907"#define CLK_ABGR 0x10C3\n"
41908"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
41909"\n"
41910"int __ovld __cnfn get_image_channel_order(read_only image1d_t image);\n"
41911"int __ovld __cnfn get_image_channel_order(read_only image1d_buffer_t image);\n"
41912"int __ovld __cnfn get_image_channel_order(read_only image2d_t image);\n"
41913"int __ovld __cnfn get_image_channel_order(read_only image3d_t image);\n"
41914"int __ovld __cnfn get_image_channel_order(read_only image1d_array_t image);\n"
41915"int __ovld __cnfn get_image_channel_order(read_only image2d_array_t image);\n"
41916"#ifdef cl_khr_depth_images\n"
41917"int __ovld __cnfn get_image_channel_order(read_only image2d_depth_t image);\n"
41918"int __ovld __cnfn get_image_channel_order(read_only image2d_array_depth_t image);\n"
41919"#endif //cl_khr_depth_images\n"
41920"#if defined(cl_khr_gl_msaa_sharing)\n"
41921"int __ovld __cnfn get_image_channel_order(read_only image2d_msaa_t image);\n"
41922"int __ovld __cnfn get_image_channel_order(read_only image2d_msaa_depth_t image);\n"
41923"int __ovld __cnfn get_image_channel_order(read_only image2d_array_msaa_t image);\n"
41924"int __ovld __cnfn get_image_channel_order(read_only image2d_array_msaa_depth_t image);\n"
41925"#endif //cl_khr_gl_msaa_sharing\n"
41926"\n"
41927"int __ovld __cnfn get_image_channel_order(write_only image1d_t image);\n"
41928"int __ovld __cnfn get_image_channel_order(write_only image1d_buffer_t image);\n"
41929"int __ovld __cnfn get_image_channel_order(write_only image2d_t image);\n"
41930"#ifdef cl_khr_3d_image_writes\n"
41931"int __ovld __cnfn get_image_channel_order(write_only image3d_t image);\n"
41932"#endif\n"
41933"int __ovld __cnfn get_image_channel_order(write_only image1d_array_t image);\n"
41934"int __ovld __cnfn get_image_channel_order(write_only image2d_array_t image);\n"
41935"#ifdef cl_khr_depth_images\n"
41936"int __ovld __cnfn get_image_channel_order(write_only image2d_depth_t image);\n"
41937"int __ovld __cnfn get_image_channel_order(write_only image2d_array_depth_t image);\n"
41938"#endif //cl_khr_depth_images\n"
41939"#if defined(cl_khr_gl_msaa_sharing)\n"
41940"int __ovld __cnfn get_image_channel_order(write_only image2d_msaa_t image);\n"
41941"int __ovld __cnfn get_image_channel_order(write_only image2d_msaa_depth_t image);\n"
41942"int __ovld __cnfn get_image_channel_order(write_only image2d_array_msaa_t image);\n"
41943"int __ovld __cnfn get_image_channel_order(write_only image2d_array_msaa_depth_t image);\n"
41944"#endif //cl_khr_gl_msaa_sharing\n"
41945"\n"
41946"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
41947"int __ovld __cnfn get_image_channel_order(read_write image1d_t image);\n"
41948"int __ovld __cnfn get_image_channel_order(read_write image1d_buffer_t image);\n"
41949"int __ovld __cnfn get_image_channel_order(read_write image2d_t image);\n"
41950"int __ovld __cnfn get_image_channel_order(read_write image3d_t image);\n"
41951"int __ovld __cnfn get_image_channel_order(read_write image1d_array_t image);\n"
41952"int __ovld __cnfn get_image_channel_order(read_write image2d_array_t image);\n"
41953"#ifdef cl_khr_depth_images\n"
41954"int __ovld __cnfn get_image_channel_order(read_write image2d_depth_t image);\n"
41955"int __ovld __cnfn get_image_channel_order(read_write image2d_array_depth_t image);\n"
41956"#endif //cl_khr_depth_images\n"
41957"#if defined(cl_khr_gl_msaa_sharing)\n"
41958"int __ovld __cnfn get_image_channel_order(read_write image2d_msaa_t image);\n"
41959"int __ovld __cnfn get_image_channel_order(read_write image2d_msaa_depth_t image);\n"
41960"int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_t image);\n"
41961"int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_depth_t image);\n"
41962"#endif //cl_khr_gl_msaa_sharing\n"
41963"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
41964"\n"
41965"/**\n"
41966" * Return the 2D image width and height as an int2\n"
41967" * type. The width is returned in the x component, and\n"
41968" * the height in the y component.\n"
41969" */\n"
41970"int2 __ovld __cnfn get_image_dim(read_only image2d_t image);\n"
41971"int2 __ovld __cnfn get_image_dim(read_only image2d_array_t image);\n"
41972"#ifdef cl_khr_depth_images\n"
41973"int2 __ovld __cnfn get_image_dim(read_only image2d_array_depth_t image);\n"
41974"int2 __ovld __cnfn get_image_dim(read_only image2d_depth_t image);\n"
41975"#endif //cl_khr_depth_images\n"
41976"#if defined(cl_khr_gl_msaa_sharing)\n"
41977"int2 __ovld __cnfn get_image_dim(read_only image2d_msaa_t image);\n"
41978"int2 __ovld __cnfn get_image_dim(read_only image2d_msaa_depth_t image);\n"
41979"int2 __ovld __cnfn get_image_dim(read_only image2d_array_msaa_t image);\n"
41980"int2 __ovld __cnfn get_image_dim(read_only image2d_array_msaa_depth_t image);\n"
41981"#endif //cl_khr_gl_msaa_sharing\n"
41982"\n"
41983"int2 __ovld __cnfn get_image_dim(write_only image2d_t image);\n"
41984"int2 __ovld __cnfn get_image_dim(write_only image2d_array_t image);\n"
41985"#ifdef cl_khr_depth_images\n"
41986"int2 __ovld __cnfn get_image_dim(write_only image2d_array_depth_t image);\n"
41987"int2 __ovld __cnfn get_image_dim(write_only image2d_depth_t image);\n"
41988"#endif //cl_khr_depth_images\n"
41989"#if defined(cl_khr_gl_msaa_sharing)\n"
41990"int2 __ovld __cnfn get_image_dim(write_only image2d_msaa_t image);\n"
41991"int2 __ovld __cnfn get_image_dim(write_only image2d_msaa_depth_t image);\n"
41992"int2 __ovld __cnfn get_image_dim(write_only image2d_array_msaa_t image);\n"
41993"int2 __ovld __cnfn get_image_dim(write_only image2d_array_msaa_depth_t image);\n"
41994"#endif //cl_khr_gl_msaa_sharing\n"
41995"\n"
41996"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
41997"int2 __ovld __cnfn get_image_dim(read_write image2d_t image);\n"
41998"int2 __ovld __cnfn get_image_dim(read_write image2d_array_t image);\n"
41999"#ifdef cl_khr_depth_images\n"
42000"int2 __ovld __cnfn get_image_dim(read_write image2d_array_depth_t image);\n"
42001"int2 __ovld __cnfn get_image_dim(read_write image2d_depth_t image);\n"
42002"#endif //cl_khr_depth_images\n"
42003"#if defined(cl_khr_gl_msaa_sharing)\n"
42004"int2 __ovld __cnfn get_image_dim(read_write image2d_msaa_t image);\n"
42005"int2 __ovld __cnfn get_image_dim(read_write image2d_msaa_depth_t image);\n"
42006"int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_t image);\n"
42007"int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_depth_t image);\n"
42008"#endif //cl_khr_gl_msaa_sharing\n"
42009"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
42010"\n"
42011"/**\n"
42012" * Return the 3D image width, height, and depth as an\n"
42013" * int4 type. The width is returned in the x\n"
42014" * component, height in the y component, depth in the z\n"
42015" * component and the w component is 0.\n"
42016" */\n"
42017"int4 __ovld __cnfn get_image_dim(read_only image3d_t image);\n"
42018"#ifdef cl_khr_3d_image_writes\n"
42019"int4 __ovld __cnfn get_image_dim(write_only image3d_t image);\n"
42020"#endif\n"
42021"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
42022"int4 __ovld __cnfn get_image_dim(read_write image3d_t image);\n"
42023"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
42024"\n"
42025"/**\n"
42026" * Return the image array size.\n"
42027" */\n"
42028"\n"
42029"size_t __ovld __cnfn get_image_array_size(read_only image1d_array_t image_array);\n"
42030"size_t __ovld __cnfn get_image_array_size(read_only image2d_array_t image_array);\n"
42031"#ifdef cl_khr_depth_images\n"
42032"size_t __ovld __cnfn get_image_array_size(read_only image2d_array_depth_t image_array);\n"
42033"#endif //cl_khr_depth_images\n"
42034"#if defined(cl_khr_gl_msaa_sharing)\n"
42035"size_t __ovld __cnfn get_image_array_size(read_only image2d_array_msaa_t image_array);\n"
42036"size_t __ovld __cnfn get_image_array_size(read_only image2d_array_msaa_depth_t image_array);\n"
42037"#endif //cl_khr_gl_msaa_sharing\n"
42038"\n"
42039"size_t __ovld __cnfn get_image_array_size(write_only image1d_array_t image_array);\n"
42040"size_t __ovld __cnfn get_image_array_size(write_only image2d_array_t image_array);\n"
42041"#ifdef cl_khr_depth_images\n"
42042"size_t __ovld __cnfn get_image_array_size(write_only image2d_array_depth_t image_array);\n"
42043"#endif //cl_khr_depth_images\n"
42044"#if defined(cl_khr_gl_msaa_sharing)\n"
42045"size_t __ovld __cnfn get_image_array_size(write_only image2d_array_msaa_t image_array);\n"
42046"size_t __ovld __cnfn get_image_array_size(write_only image2d_array_msaa_depth_t image_array);\n"
42047"#endif //cl_khr_gl_msaa_sharing\n"
42048"\n"
42049"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
42050"size_t __ovld __cnfn get_image_array_size(read_write image1d_array_t image_array);\n"
42051"size_t __ovld __cnfn get_image_array_size(read_write image2d_array_t image_array);\n"
42052"#ifdef cl_khr_depth_images\n"
42053"size_t __ovld __cnfn get_image_array_size(read_write image2d_array_depth_t image_array);\n"
42054"#endif //cl_khr_depth_images\n"
42055"#if defined(cl_khr_gl_msaa_sharing)\n"
42056"size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_t image_array);\n"
42057"size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_depth_t image_array);\n"
42058"#endif //cl_khr_gl_msaa_sharing\n"
42059"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
42060"\n"
42061"/**\n"
42062"* Return the number of samples associated with image\n"
42063"*/\n"
42064"#if defined(cl_khr_gl_msaa_sharing)\n"
42065"int __ovld get_image_num_samples(read_only image2d_msaa_t image);\n"
42066"int __ovld get_image_num_samples(read_only image2d_msaa_depth_t image);\n"
42067"int __ovld get_image_num_samples(read_only image2d_array_msaa_depth_t image);\n"
42068"int __ovld get_image_num_samples(read_only image2d_array_msaa_t image);\n"
42069"int __ovld get_image_num_samples(read_only image2d_array_msaa_depth_t image);\n"
42070"\n"
42071"int __ovld get_image_num_samples(write_only image2d_msaa_t image);\n"
42072"int __ovld get_image_num_samples(write_only image2d_msaa_depth_t image);\n"
42073"int __ovld get_image_num_samples(write_only image2d_array_msaa_depth_t image);\n"
42074"int __ovld get_image_num_samples(write_only image2d_array_msaa_t image);\n"
42075"int __ovld get_image_num_samples(write_only image2d_array_msaa_depth_t image);\n"
42076"\n"
42077"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
42078"int __ovld get_image_num_samples(read_write image2d_msaa_t image);\n"
42079"int __ovld get_image_num_samples(read_write image2d_msaa_depth_t image);\n"
42080"int __ovld get_image_num_samples(read_write image2d_array_msaa_depth_t image);\n"
42081"int __ovld get_image_num_samples(read_write image2d_array_msaa_t image);\n"
42082"int __ovld get_image_num_samples(read_write image2d_array_msaa_depth_t image);\n"
42083"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
42084"#endif\n"
42085"\n"
42086"// OpenCL v2.0 s6.13.15 - Work-group Functions\n"
42087"\n"
42088"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
42089"int __ovld __conv work_group_all(int predicate);\n"
42090"int __ovld __conv work_group_any(int predicate);\n"
42091"\n"
42092"#ifdef cl_khr_fp16\n"
42093"half __ovld __conv work_group_broadcast(half a, size_t local_id);\n"
42094"half __ovld __conv work_group_broadcast(half a, size_t x, size_t y);\n"
42095"half __ovld __conv work_group_broadcast(half a, size_t x, size_t y, size_t z);\n"
42096"#endif\n"
42097"int __ovld __conv work_group_broadcast(int a, size_t local_id);\n"
42098"int __ovld __conv work_group_broadcast(int a, size_t x, size_t y);\n"
42099"int __ovld __conv work_group_broadcast(int a, size_t x, size_t y, size_t z);\n"
42100"uint __ovld __conv work_group_broadcast(uint a, size_t local_id);\n"
42101"uint __ovld __conv work_group_broadcast(uint a, size_t x, size_t y);\n"
42102"uint __ovld __conv work_group_broadcast(uint a, size_t x, size_t y, size_t z);\n"
42103"long __ovld __conv work_group_broadcast(long a, size_t local_id);\n"
42104"long __ovld __conv work_group_broadcast(long a, size_t x, size_t y);\n"
42105"long __ovld __conv work_group_broadcast(long a, size_t x, size_t y, size_t z);\n"
42106"ulong __ovld __conv work_group_broadcast(ulong a, size_t local_id);\n"
42107"ulong __ovld __conv work_group_broadcast(ulong a, size_t x, size_t y);\n"
42108"ulong __ovld __conv work_group_broadcast(ulong a, size_t x, size_t y, size_t z);\n"
42109"float __ovld __conv work_group_broadcast(float a, size_t local_id);\n"
42110"float __ovld __conv work_group_broadcast(float a, size_t x, size_t y);\n"
42111"float __ovld __conv work_group_broadcast(float a, size_t x, size_t y, size_t z);\n"
42112"#ifdef cl_khr_fp64\n"
42113"double __ovld __conv work_group_broadcast(double a, size_t local_id);\n"
42114"double __ovld __conv work_group_broadcast(double a, size_t x, size_t y);\n"
42115"double __ovld __conv work_group_broadcast(double a, size_t x, size_t y, size_t z);\n"
42116"#endif //cl_khr_fp64\n"
42117"\n"
42118"#ifdef cl_khr_fp16\n"
42119"half __ovld __conv work_group_reduce_add(half x);\n"
42120"half __ovld __conv work_group_reduce_min(half x);\n"
42121"half __ovld __conv work_group_reduce_max(half x);\n"
42122"half __ovld __conv work_group_scan_exclusive_add(half x);\n"
42123"half __ovld __conv work_group_scan_exclusive_min(half x);\n"
42124"half __ovld __conv work_group_scan_exclusive_max(half x);\n"
42125"half __ovld __conv work_group_scan_inclusive_add(half x);\n"
42126"half __ovld __conv work_group_scan_inclusive_min(half x);\n"
42127"half __ovld __conv work_group_scan_inclusive_max(half x);\n"
42128"#endif\n"
42129"int __ovld __conv work_group_reduce_add(int x);\n"
42130"int __ovld __conv work_group_reduce_min(int x);\n"
42131"int __ovld __conv work_group_reduce_max(int x);\n"
42132"int __ovld __conv work_group_scan_exclusive_add(int x);\n"
42133"int __ovld __conv work_group_scan_exclusive_min(int x);\n"
42134"int __ovld __conv work_group_scan_exclusive_max(int x);\n"
42135"int __ovld __conv work_group_scan_inclusive_add(int x);\n"
42136"int __ovld __conv work_group_scan_inclusive_min(int x);\n"
42137"int __ovld __conv work_group_scan_inclusive_max(int x);\n"
42138"uint __ovld __conv work_group_reduce_add(uint x);\n"
42139"uint __ovld __conv work_group_reduce_min(uint x);\n"
42140"uint __ovld __conv work_group_reduce_max(uint x);\n"
42141"uint __ovld __conv work_group_scan_exclusive_add(uint x);\n"
42142"uint __ovld __conv work_group_scan_exclusive_min(uint x);\n"
42143"uint __ovld __conv work_group_scan_exclusive_max(uint x);\n"
42144"uint __ovld __conv work_group_scan_inclusive_add(uint x);\n"
42145"uint __ovld __conv work_group_scan_inclusive_min(uint x);\n"
42146"uint __ovld __conv work_group_scan_inclusive_max(uint x);\n"
42147"long __ovld __conv work_group_reduce_add(long x);\n"
42148"long __ovld __conv work_group_reduce_min(long x);\n"
42149"long __ovld __conv work_group_reduce_max(long x);\n"
42150"long __ovld __conv work_group_scan_exclusive_add(long x);\n"
42151"long __ovld __conv work_group_scan_exclusive_min(long x);\n"
42152"long __ovld __conv work_group_scan_exclusive_max(long x);\n"
42153"long __ovld __conv work_group_scan_inclusive_add(long x);\n"
42154"long __ovld __conv work_group_scan_inclusive_min(long x);\n"
42155"long __ovld __conv work_group_scan_inclusive_max(long x);\n"
42156"ulong __ovld __conv work_group_reduce_add(ulong x);\n"
42157"ulong __ovld __conv work_group_reduce_min(ulong x);\n"
42158"ulong __ovld __conv work_group_reduce_max(ulong x);\n"
42159"ulong __ovld __conv work_group_scan_exclusive_add(ulong x);\n"
42160"ulong __ovld __conv work_group_scan_exclusive_min(ulong x);\n"
42161"ulong __ovld __conv work_group_scan_exclusive_max(ulong x);\n"
42162"ulong __ovld __conv work_group_scan_inclusive_add(ulong x);\n"
42163"ulong __ovld __conv work_group_scan_inclusive_min(ulong x);\n"
42164"ulong __ovld __conv work_group_scan_inclusive_max(ulong x);\n"
42165"float __ovld __conv work_group_reduce_add(float x);\n"
42166"float __ovld __conv work_group_reduce_min(float x);\n"
42167"float __ovld __conv work_group_reduce_max(float x);\n"
42168"float __ovld __conv work_group_scan_exclusive_add(float x);\n"
42169"float __ovld __conv work_group_scan_exclusive_min(float x);\n"
42170"float __ovld __conv work_group_scan_exclusive_max(float x);\n"
42171"float __ovld __conv work_group_scan_inclusive_add(float x);\n"
42172"float __ovld __conv work_group_scan_inclusive_min(float x);\n"
42173"float __ovld __conv work_group_scan_inclusive_max(float x);\n"
42174"#ifdef cl_khr_fp64\n"
42175"double __ovld __conv work_group_reduce_add(double x);\n"
42176"double __ovld __conv work_group_reduce_min(double x);\n"
42177"double __ovld __conv work_group_reduce_max(double x);\n"
42178"double __ovld __conv work_group_scan_exclusive_add(double x);\n"
42179"double __ovld __conv work_group_scan_exclusive_min(double x);\n"
42180"double __ovld __conv work_group_scan_exclusive_max(double x);\n"
42181"double __ovld __conv work_group_scan_inclusive_add(double x);\n"
42182"double __ovld __conv work_group_scan_inclusive_min(double x);\n"
42183"double __ovld __conv work_group_scan_inclusive_max(double x);\n"
42184"#endif //cl_khr_fp64\n"
42185"\n"
42186"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
42187"\n"
42188"// OpenCL v2.0 s6.13.16 - Pipe Functions\n"
42189"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
42190"#define PIPE_RESERVE_ID_VALID_BIT (1U << 30)\n"
42191"#define CLK_NULL_RESERVE_ID (__builtin_astype(((void*)(__SIZE_MAX__)), reserve_id_t))\n"
42192"bool __ovld is_valid_reserve_id(reserve_id_t reserve_id);\n"
42193"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
42194"\n"
42195"\n"
42196"// OpenCL v2.0 s6.13.17 - Enqueue Kernels\n"
42197"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
42198"\n"
42199"#define CL_COMPLETE 0x0\n"
42200"#define CL_RUNNING 0x1\n"
42201"#define CL_SUBMITTED 0x2\n"
42202"#define CL_QUEUED 0x3\n"
42203"\n"
42204"#define CLK_SUCCESS 0\n"
42205"#define CLK_ENQUEUE_FAILURE -101\n"
42206"#define CLK_INVALID_QUEUE -102\n"
42207"#define CLK_INVALID_NDRANGE -160\n"
42208"#define CLK_INVALID_EVENT_WAIT_LIST -57\n"
42209"#define CLK_DEVICE_QUEUE_FULL -161\n"
42210"#define CLK_INVALID_ARG_SIZE -51\n"
42211"#define CLK_EVENT_ALLOCATION_FAILURE -100\n"
42212"#define CLK_OUT_OF_RESOURCES -5\n"
42213"\n"
42214"#define CLK_NULL_QUEUE 0\n"
42215"#define CLK_NULL_EVENT (__builtin_astype(((void*)(__SIZE_MAX__)), clk_event_t))\n"
42216"\n"
42217"// execution model related definitions\n"
42218"#define CLK_ENQUEUE_FLAGS_NO_WAIT 0x0\n"
42219"#define CLK_ENQUEUE_FLAGS_WAIT_KERNEL 0x1\n"
42220"#define CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP 0x2\n"
42221"\n"
42222"typedef int kernel_enqueue_flags_t;\n"
42223"typedef int clk_profiling_info;\n"
42224"\n"
42225"// Profiling info name (see capture_event_profiling_info)\n"
42226"#define CLK_PROFILING_COMMAND_EXEC_TIME 0x1\n"
42227"\n"
42228"#define MAX_WORK_DIM 3\n"
42229"\n"
42230"typedef struct {\n"
42231" unsigned int workDimension;\n"
42232" size_t globalWorkOffset[MAX_WORK_DIM];\n"
42233" size_t globalWorkSize[MAX_WORK_DIM];\n"
42234" size_t localWorkSize[MAX_WORK_DIM];\n"
42235"} ndrange_t;\n"
42236"\n"
42237"ndrange_t __ovld ndrange_1D(size_t);\n"
42238"ndrange_t __ovld ndrange_1D(size_t, size_t);\n"
42239"ndrange_t __ovld ndrange_1D(size_t, size_t, size_t);\n"
42240"\n"
42241"ndrange_t __ovld ndrange_2D(const size_t[2]);\n"
42242"ndrange_t __ovld ndrange_2D(const size_t[2], const size_t[2]);\n"
42243"ndrange_t __ovld ndrange_2D(const size_t[2], const size_t[2], const size_t[2]);\n"
42244"\n"
42245"ndrange_t __ovld ndrange_3D(const size_t[3]);\n"
42246"ndrange_t __ovld ndrange_3D(const size_t[3], const size_t[3]);\n"
42247"ndrange_t __ovld ndrange_3D(const size_t[3], const size_t[3], const size_t[3]);\n"
42248"\n"
42249"int __ovld enqueue_marker(queue_t, uint, const __private clk_event_t*, __private clk_event_t*);\n"
42250"\n"
42251"void __ovld retain_event(clk_event_t);\n"
42252"\n"
42253"void __ovld release_event(clk_event_t);\n"
42254"\n"
42255"clk_event_t __ovld create_user_event(void);\n"
42256"\n"
42257"void __ovld set_user_event_status(clk_event_t e, int state);\n"
42258"\n"
42259"bool __ovld is_valid_event (clk_event_t event);\n"
42260"\n"
42261"void __ovld capture_event_profiling_info(clk_event_t, clk_profiling_info, __global void* value);\n"
42262"\n"
42263"queue_t __ovld get_default_queue(void);\n"
42264"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
42265"\n"
42266"// OpenCL Extension v2.0 s9.17 - Sub-groups\n"
42267"\n"
42268"#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups)\n"
42269"// Shared Sub Group Functions\n"
42270"uint __ovld get_sub_group_size(void);\n"
42271"uint __ovld get_max_sub_group_size(void);\n"
42272"uint __ovld get_num_sub_groups(void);\n"
42273"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
42274"uint __ovld get_enqueued_num_sub_groups(void);\n"
42275"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
42276"uint __ovld get_sub_group_id(void);\n"
42277"uint __ovld get_sub_group_local_id(void);\n"
42278"\n"
42279"void __ovld __conv sub_group_barrier(cl_mem_fence_flags flags);\n"
42280"#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
42281"void __ovld __conv sub_group_barrier(cl_mem_fence_flags flags, memory_scope scope);\n"
42282"#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n"
42283"\n"
42284"int __ovld __conv sub_group_all(int predicate);\n"
42285"int __ovld __conv sub_group_any(int predicate);\n"
42286"\n"
42287"int __ovld __conv sub_group_broadcast(int x, uint sub_group_local_id);\n"
42288"uint __ovld __conv sub_group_broadcast(uint x, uint sub_group_local_id);\n"
42289"long __ovld __conv sub_group_broadcast(long x, uint sub_group_local_id);\n"
42290"ulong __ovld __conv sub_group_broadcast(ulong x, uint sub_group_local_id);\n"
42291"float __ovld __conv sub_group_broadcast(float x, uint sub_group_local_id);\n"
42292"\n"
42293"int __ovld __conv sub_group_reduce_add(int x);\n"
42294"uint __ovld __conv sub_group_reduce_add(uint x);\n"
42295"long __ovld __conv sub_group_reduce_add(long x);\n"
42296"ulong __ovld __conv sub_group_reduce_add(ulong x);\n"
42297"float __ovld __conv sub_group_reduce_add(float x);\n"
42298"int __ovld __conv sub_group_reduce_min(int x);\n"
42299"uint __ovld __conv sub_group_reduce_min(uint x);\n"
42300"long __ovld __conv sub_group_reduce_min(long x);\n"
42301"ulong __ovld __conv sub_group_reduce_min(ulong x);\n"
42302"float __ovld __conv sub_group_reduce_min(float x);\n"
42303"int __ovld __conv sub_group_reduce_max(int x);\n"
42304"uint __ovld __conv sub_group_reduce_max(uint x);\n"
42305"long __ovld __conv sub_group_reduce_max(long x);\n"
42306"ulong __ovld __conv sub_group_reduce_max(ulong x);\n"
42307"float __ovld __conv sub_group_reduce_max(float x);\n"
42308"\n"
42309"int __ovld __conv sub_group_scan_exclusive_add(int x);\n"
42310"uint __ovld __conv sub_group_scan_exclusive_add(uint x);\n"
42311"long __ovld __conv sub_group_scan_exclusive_add(long x);\n"
42312"ulong __ovld __conv sub_group_scan_exclusive_add(ulong x);\n"
42313"float __ovld __conv sub_group_scan_exclusive_add(float x);\n"
42314"int __ovld __conv sub_group_scan_exclusive_min(int x);\n"
42315"uint __ovld __conv sub_group_scan_exclusive_min(uint x);\n"
42316"long __ovld __conv sub_group_scan_exclusive_min(long x);\n"
42317"ulong __ovld __conv sub_group_scan_exclusive_min(ulong x);\n"
42318"float __ovld __conv sub_group_scan_exclusive_min(float x);\n"
42319"int __ovld __conv sub_group_scan_exclusive_max(int x);\n"
42320"uint __ovld __conv sub_group_scan_exclusive_max(uint x);\n"
42321"long __ovld __conv sub_group_scan_exclusive_max(long x);\n"
42322"ulong __ovld __conv sub_group_scan_exclusive_max(ulong x);\n"
42323"float __ovld __conv sub_group_scan_exclusive_max(float x);\n"
42324"\n"
42325"int __ovld __conv sub_group_scan_inclusive_add(int x);\n"
42326"uint __ovld __conv sub_group_scan_inclusive_add(uint x);\n"
42327"long __ovld __conv sub_group_scan_inclusive_add(long x);\n"
42328"ulong __ovld __conv sub_group_scan_inclusive_add(ulong x);\n"
42329"float __ovld __conv sub_group_scan_inclusive_add(float x);\n"
42330"int __ovld __conv sub_group_scan_inclusive_min(int x);\n"
42331"uint __ovld __conv sub_group_scan_inclusive_min(uint x);\n"
42332"long __ovld __conv sub_group_scan_inclusive_min(long x);\n"
42333"ulong __ovld __conv sub_group_scan_inclusive_min(ulong x);\n"
42334"float __ovld __conv sub_group_scan_inclusive_min(float x);\n"
42335"int __ovld __conv sub_group_scan_inclusive_max(int x);\n"
42336"uint __ovld __conv sub_group_scan_inclusive_max(uint x);\n"
42337"long __ovld __conv sub_group_scan_inclusive_max(long x);\n"
42338"ulong __ovld __conv sub_group_scan_inclusive_max(ulong x);\n"
42339"float __ovld __conv sub_group_scan_inclusive_max(float x);\n"
42340"\n"
42341"#ifdef cl_khr_fp16\n"
42342"half __ovld __conv sub_group_broadcast(half x, uint sub_group_local_id);\n"
42343"half __ovld __conv sub_group_reduce_add(half x);\n"
42344"half __ovld __conv sub_group_reduce_min(half x);\n"
42345"half __ovld __conv sub_group_reduce_max(half x);\n"
42346"half __ovld __conv sub_group_scan_exclusive_add(half x);\n"
42347"half __ovld __conv sub_group_scan_exclusive_min(half x);\n"
42348"half __ovld __conv sub_group_scan_exclusive_max(half x);\n"
42349"half __ovld __conv sub_group_scan_inclusive_add(half x);\n"
42350"half __ovld __conv sub_group_scan_inclusive_min(half x);\n"
42351"half __ovld __conv sub_group_scan_inclusive_max(half x);\n"
42352"#endif //cl_khr_fp16\n"
42353"\n"
42354"#ifdef cl_khr_fp64\n"
42355"double __ovld __conv sub_group_broadcast(double x, uint sub_group_local_id);\n"
42356"double __ovld __conv sub_group_reduce_add(double x);\n"
42357"double __ovld __conv sub_group_reduce_min(double x);\n"
42358"double __ovld __conv sub_group_reduce_max(double x);\n"
42359"double __ovld __conv sub_group_scan_exclusive_add(double x);\n"
42360"double __ovld __conv sub_group_scan_exclusive_min(double x);\n"
42361"double __ovld __conv sub_group_scan_exclusive_max(double x);\n"
42362"double __ovld __conv sub_group_scan_inclusive_add(double x);\n"
42363"double __ovld __conv sub_group_scan_inclusive_min(double x);\n"
42364"double __ovld __conv sub_group_scan_inclusive_max(double x);\n"
42365"#endif //cl_khr_fp64\n"
42366"\n"
42367"#endif //cl_khr_subgroups cl_intel_subgroups\n"
42368"\n"
42369"#if defined(cl_intel_subgroups)\n"
42370"// Intel-Specific Sub Group Functions\n"
42371"float __ovld __conv intel_sub_group_shuffle( float x, uint c );\n"
42372"float2 __ovld __conv intel_sub_group_shuffle( float2 x, uint c );\n"
42373"float3 __ovld __conv intel_sub_group_shuffle( float3 x, uint c );\n"
42374"float4 __ovld __conv intel_sub_group_shuffle( float4 x, uint c );\n"
42375"float8 __ovld __conv intel_sub_group_shuffle( float8 x, uint c );\n"
42376"float16 __ovld __conv intel_sub_group_shuffle( float16 x, uint c );\n"
42377"\n"
42378"int __ovld __conv intel_sub_group_shuffle( int x, uint c );\n"
42379"int2 __ovld __conv intel_sub_group_shuffle( int2 x, uint c );\n"
42380"int3 __ovld __conv intel_sub_group_shuffle( int3 x, uint c );\n"
42381"int4 __ovld __conv intel_sub_group_shuffle( int4 x, uint c );\n"
42382"int8 __ovld __conv intel_sub_group_shuffle( int8 x, uint c );\n"
42383"int16 __ovld __conv intel_sub_group_shuffle( int16 x, uint c );\n"
42384"\n"
42385"uint __ovld __conv intel_sub_group_shuffle( uint x, uint c );\n"
42386"uint2 __ovld __conv intel_sub_group_shuffle( uint2 x, uint c );\n"
42387"uint3 __ovld __conv intel_sub_group_shuffle( uint3 x, uint c );\n"
42388"uint4 __ovld __conv intel_sub_group_shuffle( uint4 x, uint c );\n"
42389"uint8 __ovld __conv intel_sub_group_shuffle( uint8 x, uint c );\n"
42390"uint16 __ovld __conv intel_sub_group_shuffle( uint16 x, uint c );\n"
42391"\n"
42392"long __ovld __conv intel_sub_group_shuffle( long x, uint c );\n"
42393"ulong __ovld __conv intel_sub_group_shuffle( ulong x, uint c );\n"
42394"\n"
42395"float __ovld __conv intel_sub_group_shuffle_down( float cur, float next, uint c );\n"
42396"float2 __ovld __conv intel_sub_group_shuffle_down( float2 cur, float2 next, uint c );\n"
42397"float3 __ovld __conv intel_sub_group_shuffle_down( float3 cur, float3 next, uint c );\n"
42398"float4 __ovld __conv intel_sub_group_shuffle_down( float4 cur, float4 next, uint c );\n"
42399"float8 __ovld __conv intel_sub_group_shuffle_down( float8 cur, float8 next, uint c );\n"
42400"float16 __ovld __conv intel_sub_group_shuffle_down( float16 cur, float16 next, uint c );\n"
42401"\n"
42402"int __ovld __conv intel_sub_group_shuffle_down( int cur, int next, uint c );\n"
42403"int2 __ovld __conv intel_sub_group_shuffle_down( int2 cur, int2 next, uint c );\n"
42404"int3 __ovld __conv intel_sub_group_shuffle_down( int3 cur, int3 next, uint c );\n"
42405"int4 __ovld __conv intel_sub_group_shuffle_down( int4 cur, int4 next, uint c );\n"
42406"int8 __ovld __conv intel_sub_group_shuffle_down( int8 cur, int8 next, uint c );\n"
42407"int16 __ovld __conv intel_sub_group_shuffle_down( int16 cur, int16 next, uint c );\n"
42408"\n"
42409"uint __ovld __conv intel_sub_group_shuffle_down( uint cur, uint next, uint c );\n"
42410"uint2 __ovld __conv intel_sub_group_shuffle_down( uint2 cur, uint2 next, uint c );\n"
42411"uint3 __ovld __conv intel_sub_group_shuffle_down( uint3 cur, uint3 next, uint c );\n"
42412"uint4 __ovld __conv intel_sub_group_shuffle_down( uint4 cur, uint4 next, uint c );\n"
42413"uint8 __ovld __conv intel_sub_group_shuffle_down( uint8 cur, uint8 next, uint c );\n"
42414"uint16 __ovld __conv intel_sub_group_shuffle_down( uint16 cur, uint16 next, uint c );\n"
42415"\n"
42416"long __ovld __conv intel_sub_group_shuffle_down( long prev, long cur, uint c );\n"
42417"ulong __ovld __conv intel_sub_group_shuffle_down( ulong prev, ulong cur, uint c );\n"
42418"\n"
42419"float __ovld __conv intel_sub_group_shuffle_up( float prev, float cur, uint c );\n"
42420"float2 __ovld __conv intel_sub_group_shuffle_up( float2 prev, float2 cur, uint c );\n"
42421"float3 __ovld __conv intel_sub_group_shuffle_up( float3 prev, float3 cur, uint c );\n"
42422"float4 __ovld __conv intel_sub_group_shuffle_up( float4 prev, float4 cur, uint c );\n"
42423"float8 __ovld __conv intel_sub_group_shuffle_up( float8 prev, float8 cur, uint c );\n"
42424"float16 __ovld __conv intel_sub_group_shuffle_up( float16 prev, float16 cur, uint c );\n"
42425"\n"
42426"int __ovld __conv intel_sub_group_shuffle_up( int prev, int cur, uint c );\n"
42427"int2 __ovld __conv intel_sub_group_shuffle_up( int2 prev, int2 cur, uint c );\n"
42428"int3 __ovld __conv intel_sub_group_shuffle_up( int3 prev, int3 cur, uint c );\n"
42429"int4 __ovld __conv intel_sub_group_shuffle_up( int4 prev, int4 cur, uint c );\n"
42430"int8 __ovld __conv intel_sub_group_shuffle_up( int8 prev, int8 cur, uint c );\n"
42431"int16 __ovld __conv intel_sub_group_shuffle_up( int16 prev, int16 cur, uint c );\n"
42432"\n"
42433"uint __ovld __conv intel_sub_group_shuffle_up( uint prev, uint cur, uint c );\n"
42434"uint2 __ovld __conv intel_sub_group_shuffle_up( uint2 prev, uint2 cur, uint c );\n"
42435"uint3 __ovld __conv intel_sub_group_shuffle_up( uint3 prev, uint3 cur, uint c );\n"
42436"uint4 __ovld __conv intel_sub_group_shuffle_up( uint4 prev, uint4 cur, uint c );\n"
42437"uint8 __ovld __conv intel_sub_group_shuffle_up( uint8 prev, uint8 cur, uint c );\n"
42438"uint16 __ovld __conv intel_sub_group_shuffle_up( uint16 prev, uint16 cur, uint c );\n"
42439"\n"
42440"long __ovld __conv intel_sub_group_shuffle_up( long prev, long cur, uint c );\n"
42441"ulong __ovld __conv intel_sub_group_shuffle_up( ulong prev, ulong cur, uint c );\n"
42442"\n"
42443"float __ovld __conv intel_sub_group_shuffle_xor( float x, uint c );\n"
42444"float2 __ovld __conv intel_sub_group_shuffle_xor( float2 x, uint c );\n"
42445"float3 __ovld __conv intel_sub_group_shuffle_xor( float3 x, uint c );\n"
42446"float4 __ovld __conv intel_sub_group_shuffle_xor( float4 x, uint c );\n"
42447"float8 __ovld __conv intel_sub_group_shuffle_xor( float8 x, uint c );\n"
42448"float16 __ovld __conv intel_sub_group_shuffle_xor( float16 x, uint c );\n"
42449"\n"
42450"int __ovld __conv intel_sub_group_shuffle_xor( int x, uint c );\n"
42451"int2 __ovld __conv intel_sub_group_shuffle_xor( int2 x, uint c );\n"
42452"int3 __ovld __conv intel_sub_group_shuffle_xor( int3 x, uint c );\n"
42453"int4 __ovld __conv intel_sub_group_shuffle_xor( int4 x, uint c );\n"
42454"int8 __ovld __conv intel_sub_group_shuffle_xor( int8 x, uint c );\n"
42455"int16 __ovld __conv intel_sub_group_shuffle_xor( int16 x, uint c );\n"
42456"\n"
42457"uint __ovld __conv intel_sub_group_shuffle_xor( uint x, uint c );\n"
42458"uint2 __ovld __conv intel_sub_group_shuffle_xor( uint2 x, uint c );\n"
42459"uint3 __ovld __conv intel_sub_group_shuffle_xor( uint3 x, uint c );\n"
42460"uint4 __ovld __conv intel_sub_group_shuffle_xor( uint4 x, uint c );\n"
42461"uint8 __ovld __conv intel_sub_group_shuffle_xor( uint8 x, uint c );\n"
42462"uint16 __ovld __conv intel_sub_group_shuffle_xor( uint16 x, uint c );\n"
42463"\n"
42464"long __ovld __conv intel_sub_group_shuffle_xor( long x, uint c );\n"
42465"ulong __ovld __conv intel_sub_group_shuffle_xor( ulong x, uint c );\n"
42466"\n"
42467"uint __ovld __conv intel_sub_group_block_read( read_only image2d_t image, int2 coord );\n"
42468"uint2 __ovld __conv intel_sub_group_block_read2( read_only image2d_t image, int2 coord );\n"
42469"uint4 __ovld __conv intel_sub_group_block_read4( read_only image2d_t image, int2 coord );\n"
42470"uint8 __ovld __conv intel_sub_group_block_read8( read_only image2d_t image, int2 coord );\n"
42471"\n"
42472"#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n"
42473"uint __ovld __conv intel_sub_group_block_read(read_write image2d_t image, int2 coord);\n"
42474"uint2 __ovld __conv intel_sub_group_block_read2(read_write image2d_t image, int2 coord);\n"
42475"uint4 __ovld __conv intel_sub_group_block_read4(read_write image2d_t image, int2 coord);\n"
42476"uint8 __ovld __conv intel_sub_group_block_read8(read_write image2d_t image, int2 coord);\n"
42477"#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n"
42478"\n"
42479"uint __ovld __conv intel_sub_group_block_read( const __global uint* p );\n"
42480"uint2 __ovld __conv intel_sub_group_block_read2( const __global uint* p );\n"
42481"uint4 __ovld __conv intel_sub_group_block_read4( const __global uint* p );\n"
42482"uint8 __ovld __conv intel_sub_group_block_read8( const __global uint* p );\n"
42483"\n"
42484"void __ovld __conv intel_sub_group_block_write(write_only image2d_t image, int2 coord, uint data);\n"
42485"void __ovld __conv intel_sub_group_block_write2(write_only image2d_t image, int2 coord, uint2 data);\n"
42486"void __ovld __conv intel_sub_group_block_write4(write_only image2d_t image, int2 coord, uint4 data);\n"
42487"void __ovld __conv intel_sub_group_block_write8(write_only image2d_t image, int2 coord, uint8 data);\n"
42488"\n"
42489"#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n"
42490"void __ovld __conv intel_sub_group_block_write(read_write image2d_t image, int2 coord, uint data);\n"
42491"void __ovld __conv intel_sub_group_block_write2(read_write image2d_t image, int2 coord, uint2 data);\n"
42492"void __ovld __conv intel_sub_group_block_write4(read_write image2d_t image, int2 coord, uint4 data);\n"
42493"void __ovld __conv intel_sub_group_block_write8(read_write image2d_t image, int2 coord, uint8 data);\n"
42494"#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n"
42495"\n"
42496"void __ovld __conv intel_sub_group_block_write( __global uint* p, uint data );\n"
42497"void __ovld __conv intel_sub_group_block_write2( __global uint* p, uint2 data );\n"
42498"void __ovld __conv intel_sub_group_block_write4( __global uint* p, uint4 data );\n"
42499"void __ovld __conv intel_sub_group_block_write8( __global uint* p, uint8 data );\n"
42500"\n"
42501"#ifdef cl_khr_fp16\n"
42502"half __ovld __conv intel_sub_group_shuffle( half x, uint c );\n"
42503"half __ovld __conv intel_sub_group_shuffle_down( half prev, half cur, uint c );\n"
42504"half __ovld __conv intel_sub_group_shuffle_up( half prev, half cur, uint c );\n"
42505"half __ovld __conv intel_sub_group_shuffle_xor( half x, uint c );\n"
42506"#endif\n"
42507"\n"
42508"#if defined(cl_khr_fp64)\n"
42509"double __ovld __conv intel_sub_group_shuffle( double x, uint c );\n"
42510"double __ovld __conv intel_sub_group_shuffle_down( double prev, double cur, uint c );\n"
42511"double __ovld __conv intel_sub_group_shuffle_up( double prev, double cur, uint c );\n"
42512"double __ovld __conv intel_sub_group_shuffle_xor( double x, uint c );\n"
42513"#endif\n"
42514"\n"
42515"#endif //cl_intel_subgroups\n"
42516"\n"
42517"#if defined(cl_intel_subgroups_short)\n"
42518"short __ovld __conv intel_sub_group_broadcast( short x, uint sub_group_local_id );\n"
42519"short2 __ovld __conv intel_sub_group_broadcast( short2 x, uint sub_group_local_id );\n"
42520"short3 __ovld __conv intel_sub_group_broadcast( short3 x, uint sub_group_local_id );\n"
42521"short4 __ovld __conv intel_sub_group_broadcast( short4 x, uint sub_group_local_id );\n"
42522"short8 __ovld __conv intel_sub_group_broadcast( short8 x, uint sub_group_local_id );\n"
42523"\n"
42524"ushort __ovld __conv intel_sub_group_broadcast( ushort x, uint sub_group_local_id );\n"
42525"ushort2 __ovld __conv intel_sub_group_broadcast( ushort2 x, uint sub_group_local_id );\n"
42526"ushort3 __ovld __conv intel_sub_group_broadcast( ushort3 x, uint sub_group_local_id );\n"
42527"ushort4 __ovld __conv intel_sub_group_broadcast( ushort4 x, uint sub_group_local_id );\n"
42528"ushort8 __ovld __conv intel_sub_group_broadcast( ushort8 x, uint sub_group_local_id );\n"
42529"\n"
42530"short __ovld __conv intel_sub_group_shuffle( short x, uint c );\n"
42531"short2 __ovld __conv intel_sub_group_shuffle( short2 x, uint c );\n"
42532"short3 __ovld __conv intel_sub_group_shuffle( short3 x, uint c );\n"
42533"short4 __ovld __conv intel_sub_group_shuffle( short4 x, uint c );\n"
42534"short8 __ovld __conv intel_sub_group_shuffle( short8 x, uint c );\n"
42535"short16 __ovld __conv intel_sub_group_shuffle( short16 x, uint c);\n"
42536"\n"
42537"ushort __ovld __conv intel_sub_group_shuffle( ushort x, uint c );\n"
42538"ushort2 __ovld __conv intel_sub_group_shuffle( ushort2 x, uint c );\n"
42539"ushort3 __ovld __conv intel_sub_group_shuffle( ushort3 x, uint c );\n"
42540"ushort4 __ovld __conv intel_sub_group_shuffle( ushort4 x, uint c );\n"
42541"ushort8 __ovld __conv intel_sub_group_shuffle( ushort8 x, uint c );\n"
42542"ushort16 __ovld __conv intel_sub_group_shuffle( ushort16 x, uint c );\n"
42543"\n"
42544"short __ovld __conv intel_sub_group_shuffle_down( short cur, short next, uint c );\n"
42545"short2 __ovld __conv intel_sub_group_shuffle_down( short2 cur, short2 next, uint c );\n"
42546"short3 __ovld __conv intel_sub_group_shuffle_down( short3 cur, short3 next, uint c );\n"
42547"short4 __ovld __conv intel_sub_group_shuffle_down( short4 cur, short4 next, uint c );\n"
42548"short8 __ovld __conv intel_sub_group_shuffle_down( short8 cur, short8 next, uint c );\n"
42549"short16 __ovld __conv intel_sub_group_shuffle_down( short16 cur, short16 next, uint c );\n"
42550"\n"
42551"ushort __ovld __conv intel_sub_group_shuffle_down( ushort cur, ushort next, uint c );\n"
42552"ushort2 __ovld __conv intel_sub_group_shuffle_down( ushort2 cur, ushort2 next, uint c );\n"
42553"ushort3 __ovld __conv intel_sub_group_shuffle_down( ushort3 cur, ushort3 next, uint c );\n"
42554"ushort4 __ovld __conv intel_sub_group_shuffle_down( ushort4 cur, ushort4 next, uint c );\n"
42555"ushort8 __ovld __conv intel_sub_group_shuffle_down( ushort8 cur, ushort8 next, uint c );\n"
42556"ushort16 __ovld __conv intel_sub_group_shuffle_down( ushort16 cur, ushort16 next, uint c );\n"
42557"\n"
42558"short __ovld __conv intel_sub_group_shuffle_up( short cur, short next, uint c );\n"
42559"short2 __ovld __conv intel_sub_group_shuffle_up( short2 cur, short2 next, uint c );\n"
42560"short3 __ovld __conv intel_sub_group_shuffle_up( short3 cur, short3 next, uint c );\n"
42561"short4 __ovld __conv intel_sub_group_shuffle_up( short4 cur, short4 next, uint c );\n"
42562"short8 __ovld __conv intel_sub_group_shuffle_up( short8 cur, short8 next, uint c );\n"
42563"short16 __ovld __conv intel_sub_group_shuffle_up( short16 cur, short16 next, uint c );\n"
42564"\n"
42565"ushort __ovld __conv intel_sub_group_shuffle_up( ushort cur, ushort next, uint c );\n"
42566"ushort2 __ovld __conv intel_sub_group_shuffle_up( ushort2 cur, ushort2 next, uint c );\n"
42567"ushort3 __ovld __conv intel_sub_group_shuffle_up( ushort3 cur, ushort3 next, uint c );\n"
42568"ushort4 __ovld __conv intel_sub_group_shuffle_up( ushort4 cur, ushort4 next, uint c );\n"
42569"ushort8 __ovld __conv intel_sub_group_shuffle_up( ushort8 cur, ushort8 next, uint c );\n"
42570"ushort16 __ovld __conv intel_sub_group_shuffle_up( ushort16 cur, ushort16 next, uint c );\n"
42571"\n"
42572"short __ovld __conv intel_sub_group_shuffle_xor( short x, uint c );\n"
42573"short2 __ovld __conv intel_sub_group_shuffle_xor( short2 x, uint c );\n"
42574"short3 __ovld __conv intel_sub_group_shuffle_xor( short3 x, uint c );\n"
42575"short4 __ovld __conv intel_sub_group_shuffle_xor( short4 x, uint c );\n"
42576"short8 __ovld __conv intel_sub_group_shuffle_xor( short8 x, uint c );\n"
42577"short16 __ovld __conv intel_sub_group_shuffle_xor( short16 x, uint c );\n"
42578"\n"
42579"ushort __ovld __conv intel_sub_group_shuffle_xor( ushort x, uint c );\n"
42580"ushort2 __ovld __conv intel_sub_group_shuffle_xor( ushort2 x, uint c );\n"
42581"ushort3 __ovld __conv intel_sub_group_shuffle_xor( ushort3 x, uint c );\n"
42582"ushort4 __ovld __conv intel_sub_group_shuffle_xor( ushort4 x, uint c );\n"
42583"ushort8 __ovld __conv intel_sub_group_shuffle_xor( ushort8 x, uint c );\n"
42584"ushort16 __ovld __conv intel_sub_group_shuffle_xor( ushort16 x, uint c );\n"
42585"\n"
42586"short __ovld __conv intel_sub_group_reduce_add( short x );\n"
42587"ushort __ovld __conv intel_sub_group_reduce_add( ushort x );\n"
42588"short __ovld __conv intel_sub_group_reduce_min( short x );\n"
42589"ushort __ovld __conv intel_sub_group_reduce_min( ushort x );\n"
42590"short __ovld __conv intel_sub_group_reduce_max( short x );\n"
42591"ushort __ovld __conv intel_sub_group_reduce_max( ushort x );\n"
42592"\n"
42593"short __ovld __conv intel_sub_group_scan_exclusive_add( short x );\n"
42594"ushort __ovld __conv intel_sub_group_scan_exclusive_add( ushort x );\n"
42595"short __ovld __conv intel_sub_group_scan_exclusive_min( short x );\n"
42596"ushort __ovld __conv intel_sub_group_scan_exclusive_min( ushort x );\n"
42597"short __ovld __conv intel_sub_group_scan_exclusive_max( short x );\n"
42598"ushort __ovld __conv intel_sub_group_scan_exclusive_max( ushort x );\n"
42599"\n"
42600"short __ovld __conv intel_sub_group_scan_inclusive_add( short x );\n"
42601"ushort __ovld __conv intel_sub_group_scan_inclusive_add( ushort x );\n"
42602"short __ovld __conv intel_sub_group_scan_inclusive_min( short x );\n"
42603"ushort __ovld __conv intel_sub_group_scan_inclusive_min( ushort x );\n"
42604"short __ovld __conv intel_sub_group_scan_inclusive_max( short x );\n"
42605"ushort __ovld __conv intel_sub_group_scan_inclusive_max( ushort x );\n"
42606"\n"
42607"uint __ovld __conv intel_sub_group_block_read_ui( read_only image2d_t image, int2 byte_coord );\n"
42608"uint2 __ovld __conv intel_sub_group_block_read_ui2( read_only image2d_t image, int2 byte_coord );\n"
42609"uint4 __ovld __conv intel_sub_group_block_read_ui4( read_only image2d_t image, int2 byte_coord );\n"
42610"uint8 __ovld __conv intel_sub_group_block_read_ui8( read_only image2d_t image, int2 byte_coord );\n"
42611"\n"
42612"#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n"
42613"uint __ovld __conv intel_sub_group_block_read_ui( read_write image2d_t image, int2 byte_coord );\n"
42614"uint2 __ovld __conv intel_sub_group_block_read_ui2( read_write image2d_t image, int2 byte_coord );\n"
42615"uint4 __ovld __conv intel_sub_group_block_read_ui4( read_write image2d_t image, int2 byte_coord );\n"
42616"uint8 __ovld __conv intel_sub_group_block_read_ui8( read_write image2d_t image, int2 byte_coord );\n"
42617"#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n"
42618"\n"
42619"uint __ovld __conv intel_sub_group_block_read_ui( const __global uint* p );\n"
42620"uint2 __ovld __conv intel_sub_group_block_read_ui2( const __global uint* p );\n"
42621"uint4 __ovld __conv intel_sub_group_block_read_ui4( const __global uint* p );\n"
42622"uint8 __ovld __conv intel_sub_group_block_read_ui8( const __global uint* p );\n"
42623"\n"
42624"void __ovld __conv intel_sub_group_block_write_ui( read_only image2d_t image, int2 byte_coord, uint data );\n"
42625"void __ovld __conv intel_sub_group_block_write_ui2( read_only image2d_t image, int2 byte_coord, uint2 data );\n"
42626"void __ovld __conv intel_sub_group_block_write_ui4( read_only image2d_t image, int2 byte_coord, uint4 data );\n"
42627"void __ovld __conv intel_sub_group_block_write_ui8( read_only image2d_t image, int2 byte_coord, uint8 data );\n"
42628"\n"
42629"#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n"
42630"void __ovld __conv intel_sub_group_block_write_ui( read_write image2d_t image, int2 byte_coord, uint data );\n"
42631"void __ovld __conv intel_sub_group_block_write_ui2( read_write image2d_t image, int2 byte_coord, uint2 data );\n"
42632"void __ovld __conv intel_sub_group_block_write_ui4( read_write image2d_t image, int2 byte_coord, uint4 data );\n"
42633"void __ovld __conv intel_sub_group_block_write_ui8( read_write image2d_t image, int2 byte_coord, uint8 data );\n"
42634"#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n"
42635"\n"
42636"void __ovld __conv intel_sub_group_block_write_ui( __global uint* p, uint data );\n"
42637"void __ovld __conv intel_sub_group_block_write_ui2( __global uint* p, uint2 data );\n"
42638"void __ovld __conv intel_sub_group_block_write_ui4( __global uint* p, uint4 data );\n"
42639"void __ovld __conv intel_sub_group_block_write_ui8( __global uint* p, uint8 data );\n"
42640"\n"
42641"ushort __ovld __conv intel_sub_group_block_read_us( read_only image2d_t image, int2 coord );\n"
42642"ushort2 __ovld __conv intel_sub_group_block_read_us2( read_only image2d_t image, int2 coord );\n"
42643"ushort4 __ovld __conv intel_sub_group_block_read_us4( read_only image2d_t image, int2 coord );\n"
42644"ushort8 __ovld __conv intel_sub_group_block_read_us8( read_only image2d_t image, int2 coord );\n"
42645"\n"
42646"#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n"
42647"ushort __ovld __conv intel_sub_group_block_read_us(read_write image2d_t image, int2 coord);\n"
42648"ushort2 __ovld __conv intel_sub_group_block_read_us2(read_write image2d_t image, int2 coord);\n"
42649"ushort4 __ovld __conv intel_sub_group_block_read_us4(read_write image2d_t image, int2 coord);\n"
42650"ushort8 __ovld __conv intel_sub_group_block_read_us8(read_write image2d_t image, int2 coord);\n"
42651"#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n"
42652"\n"
42653"ushort __ovld __conv intel_sub_group_block_read_us( const __global ushort* p );\n"
42654"ushort2 __ovld __conv intel_sub_group_block_read_us2( const __global ushort* p );\n"
42655"ushort4 __ovld __conv intel_sub_group_block_read_us4( const __global ushort* p );\n"
42656"ushort8 __ovld __conv intel_sub_group_block_read_us8( const __global ushort* p );\n"
42657"\n"
42658"void __ovld __conv intel_sub_group_block_write_us(write_only image2d_t image, int2 coord, ushort data);\n"
42659"void __ovld __conv intel_sub_group_block_write_us2(write_only image2d_t image, int2 coord, ushort2 data);\n"
42660"void __ovld __conv intel_sub_group_block_write_us4(write_only image2d_t image, int2 coord, ushort4 data);\n"
42661"void __ovld __conv intel_sub_group_block_write_us8(write_only image2d_t image, int2 coord, ushort8 data);\n"
42662"\n"
42663"#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n"
42664"void __ovld __conv intel_sub_group_block_write_us(read_write image2d_t image, int2 coord, ushort data);\n"
42665"void __ovld __conv intel_sub_group_block_write_us2(read_write image2d_t image, int2 coord, ushort2 data);\n"
42666"void __ovld __conv intel_sub_group_block_write_us4(read_write image2d_t image, int2 coord, ushort4 data);\n"
42667"void __ovld __conv intel_sub_group_block_write_us8(read_write image2d_t image, int2 coord, ushort8 data);\n"
42668"#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n"
42669"\n"
42670"void __ovld __conv intel_sub_group_block_write_us( __global ushort* p, ushort data );\n"
42671"void __ovld __conv intel_sub_group_block_write_us2( __global ushort* p, ushort2 data );\n"
42672"void __ovld __conv intel_sub_group_block_write_us4( __global ushort* p, ushort4 data );\n"
42673"void __ovld __conv intel_sub_group_block_write_us8( __global ushort* p, ushort8 data );\n"
42674"#endif // cl_intel_subgroups_short\n"
42675"\n"
42676"#ifdef cl_amd_media_ops\n"
42677"uint __ovld amd_bitalign(uint a, uint b, uint c);\n"
42678"uint2 __ovld amd_bitalign(uint2 a, uint2 b, uint2 c);\n"
42679"uint3 __ovld amd_bitalign(uint3 a, uint3 b, uint3 c);\n"
42680"uint4 __ovld amd_bitalign(uint4 a, uint4 b, uint4 c);\n"
42681"uint8 __ovld amd_bitalign(uint8 a, uint8 b, uint8 c);\n"
42682"uint16 __ovld amd_bitalign(uint16 a, uint16 b, uint16 c);\n"
42683"\n"
42684"uint __ovld amd_bytealign(uint a, uint b, uint c);\n"
42685"uint2 __ovld amd_bytealign(uint2 a, uint2 b, uint2 c);\n"
42686"uint3 __ovld amd_bytealign(uint3 a, uint3 b, uint3 c);\n"
42687"uint4 __ovld amd_bytealign(uint4 a, uint4 b, uint4 c);\n"
42688"uint8 __ovld amd_bytealign(uint8 a, uint8 b, uint8 c);\n"
42689"uint16 __ovld amd_bytealign(uint16 a, uint16 b, uint16 c);\n"
42690"\n"
42691"uint __ovld amd_lerp(uint a, uint b, uint c);\n"
42692"uint2 __ovld amd_lerp(uint2 a, uint2 b, uint2 c);\n"
42693"uint3 __ovld amd_lerp(uint3 a, uint3 b, uint3 c);\n"
42694"uint4 __ovld amd_lerp(uint4 a, uint4 b, uint4 c);\n"
42695"uint8 __ovld amd_lerp(uint8 a, uint8 b, uint8 c);\n"
42696"uint16 __ovld amd_lerp(uint16 a, uint16 b, uint16 c);\n"
42697"\n"
42698"uint __ovld amd_pack(float4 v);\n"
42699"\n"
42700"uint __ovld amd_sad4(uint4 x, uint4 y, uint z);\n"
42701"\n"
42702"uint __ovld amd_sadhi(uint a, uint b, uint c);\n"
42703"uint2 __ovld amd_sadhi(uint2 a, uint2 b, uint2 c);\n"
42704"uint3 __ovld amd_sadhi(uint3 a, uint3 b, uint3 c);\n"
42705"uint4 __ovld amd_sadhi(uint4 a, uint4 b, uint4 c);\n"
42706"uint8 __ovld amd_sadhi(uint8 a, uint8 b, uint8 c);\n"
42707"uint16 __ovld amd_sadhi(uint16 a, uint16 b, uint16 c);\n"
42708"\n"
42709"uint __ovld amd_sad(uint a, uint b, uint c);\n"
42710"uint2 __ovld amd_sad(uint2 a, uint2 b, uint2 c);\n"
42711"uint3 __ovld amd_sad(uint3 a, uint3 b, uint3 c);\n"
42712"uint4 __ovld amd_sad(uint4 a, uint4 b, uint4 c);\n"
42713"uint8 __ovld amd_sad(uint8 a, uint8 b, uint8 c);\n"
42714"uint16 __ovld amd_sad(uint16 a, uint16 b, uint16 c);\n"
42715"\n"
42716"float __ovld amd_unpack0(uint a);\n"
42717"float2 __ovld amd_unpack0(uint2 a);\n"
42718"float3 __ovld amd_unpack0(uint3 a);\n"
42719"float4 __ovld amd_unpack0(uint4 a);\n"
42720"float8 __ovld amd_unpack0(uint8 a);\n"
42721"float16 __ovld amd_unpack0(uint16 a);\n"
42722"\n"
42723"float __ovld amd_unpack1(uint a);\n"
42724"float2 __ovld amd_unpack1(uint2 a);\n"
42725"float3 __ovld amd_unpack1(uint3 a);\n"
42726"float4 __ovld amd_unpack1(uint4 a);\n"
42727"float8 __ovld amd_unpack1(uint8 a);\n"
42728"float16 __ovld amd_unpack1(uint16 a);\n"
42729"\n"
42730"float __ovld amd_unpack2(uint a);\n"
42731"float2 __ovld amd_unpack2(uint2 a);\n"
42732"float3 __ovld amd_unpack2(uint3 a);\n"
42733"float4 __ovld amd_unpack2(uint4 a);\n"
42734"float8 __ovld amd_unpack2(uint8 a);\n"
42735"float16 __ovld amd_unpack2(uint16 a);\n"
42736"\n"
42737"float __ovld amd_unpack3(uint a);\n"
42738"float2 __ovld amd_unpack3(uint2 a);\n"
42739"float3 __ovld amd_unpack3(uint3 a);\n"
42740"float4 __ovld amd_unpack3(uint4 a);\n"
42741"float8 __ovld amd_unpack3(uint8 a);\n"
42742"float16 __ovld amd_unpack3(uint16 a);\n"
42743"#endif // cl_amd_media_ops\n"
42744"\n"
42745"#ifdef cl_amd_media_ops2\n"
42746"int __ovld amd_bfe(int src0, uint src1, uint src2);\n"
42747"int2 __ovld amd_bfe(int2 src0, uint2 src1, uint2 src2);\n"
42748"int3 __ovld amd_bfe(int3 src0, uint3 src1, uint3 src2);\n"
42749"int4 __ovld amd_bfe(int4 src0, uint4 src1, uint4 src2);\n"
42750"int8 __ovld amd_bfe(int8 src0, uint8 src1, uint8 src2);\n"
42751"int16 __ovld amd_bfe(int16 src0, uint16 src1, uint16 src2);\n"
42752"\n"
42753"uint __ovld amd_bfe(uint src0, uint src1, uint src2);\n"
42754"uint2 __ovld amd_bfe(uint2 src0, uint2 src1, uint2 src2);\n"
42755"uint3 __ovld amd_bfe(uint3 src0, uint3 src1, uint3 src2);\n"
42756"uint4 __ovld amd_bfe(uint4 src0, uint4 src1, uint4 src2);\n"
42757"uint8 __ovld amd_bfe(uint8 src0, uint8 src1, uint8 src2);\n"
42758"uint16 __ovld amd_bfe(uint16 src0, uint16 src1, uint16 src2);\n"
42759"\n"
42760"uint __ovld amd_bfm(uint src0, uint src1);\n"
42761"uint2 __ovld amd_bfm(uint2 src0, uint2 src1);\n"
42762"uint3 __ovld amd_bfm(uint3 src0, uint3 src1);\n"
42763"uint4 __ovld amd_bfm(uint4 src0, uint4 src1);\n"
42764"uint8 __ovld amd_bfm(uint8 src0, uint8 src1);\n"
42765"uint16 __ovld amd_bfm(uint16 src0, uint16 src1);\n"
42766"\n"
42767"float __ovld amd_max3(float src0, float src1, float src2);\n"
42768"float2 __ovld amd_max3(float2 src0, float2 src1, float2 src2);\n"
42769"float3 __ovld amd_max3(float3 src0, float3 src1, float3 src2);\n"
42770"float4 __ovld amd_max3(float4 src0, float4 src1, float4 src2);\n"
42771"float8 __ovld amd_max3(float8 src0, float8 src1, float8 src2);\n"
42772"float16 __ovld amd_max3(float16 src0, float16 src1, float16 src2);\n"
42773"\n"
42774"int __ovld amd_max3(int src0, int src1, int src2);\n"
42775"int2 __ovld amd_max3(int2 src0, int2 src1, int2 src2);\n"
42776"int3 __ovld amd_max3(int3 src0, int3 src1, int3 src2);\n"
42777"int4 __ovld amd_max3(int4 src0, int4 src1, int4 src2);\n"
42778"int8 __ovld amd_max3(int8 src0, int8 src1, int8 src2);\n"
42779"int16 __ovld amd_max3(int16 src0, int16 src1, int16 src2);\n"
42780"\n"
42781"uint __ovld amd_max3(uint src0, uint src1, uint src2);\n"
42782"uint2 __ovld amd_max3(uint2 src0, uint2 src1, uint2 src2);\n"
42783"uint3 __ovld amd_max3(uint3 src0, uint3 src1, uint3 src2);\n"
42784"uint4 __ovld amd_max3(uint4 src0, uint4 src1, uint4 src2);\n"
42785"uint8 __ovld amd_max3(uint8 src0, uint8 src1, uint8 src2);\n"
42786"uint16 __ovld amd_max3(uint16 src0, uint16 src1, uint16 src2);\n"
42787"\n"
42788"float __ovld amd_median3(float src0, float src1, float src2);\n"
42789"float2 __ovld amd_median3(float2 src0, float2 src1, float2 src2);\n"
42790"float3 __ovld amd_median3(float3 src0, float3 src1, float3 src2);\n"
42791"float4 __ovld amd_median3(float4 src0, float4 src1, float4 src2);\n"
42792"float8 __ovld amd_median3(float8 src0, float8 src1, float8 src2);\n"
42793"float16 __ovld amd_median3(float16 src0, float16 src1, float16 src2);\n"
42794"\n"
42795"int __ovld amd_median3(int src0, int src1, int src2);\n"
42796"int2 __ovld amd_median3(int2 src0, int2 src1, int2 src2);\n"
42797"int3 __ovld amd_median3(int3 src0, int3 src1, int3 src2);\n"
42798"int4 __ovld amd_median3(int4 src0, int4 src1, int4 src2);\n"
42799"int8 __ovld amd_median3(int8 src0, int8 src1, int8 src2);\n"
42800"int16 __ovld amd_median3(int16 src0, int16 src1, int16 src2);\n"
42801"\n"
42802"uint __ovld amd_median3(uint src0, uint src1, uint src2);\n"
42803"uint2 __ovld amd_median3(uint2 src0, uint2 src1, uint2 src2);\n"
42804"uint3 __ovld amd_median3(uint3 src0, uint3 src1, uint3 src2);\n"
42805"uint4 __ovld amd_median3(uint4 src0, uint4 src1, uint4 src2);\n"
42806"uint8 __ovld amd_median3(uint8 src0, uint8 src1, uint8 src2);\n"
42807"uint16 __ovld amd_median3(uint16 src0, uint16 src1, uint16 src2);\n"
42808"\n"
42809"float __ovld amd_min3(float src0, float src1, float src);\n"
42810"float2 __ovld amd_min3(float2 src0, float2 src1, float2 src);\n"
42811"float3 __ovld amd_min3(float3 src0, float3 src1, float3 src);\n"
42812"float4 __ovld amd_min3(float4 src0, float4 src1, float4 src);\n"
42813"float8 __ovld amd_min3(float8 src0, float8 src1, float8 src);\n"
42814"float16 __ovld amd_min3(float16 src0, float16 src1, float16 src);\n"
42815"\n"
42816"int __ovld amd_min3(int src0, int src1, int src2);\n"
42817"int2 __ovld amd_min3(int2 src0, int2 src1, int2 src2);\n"
42818"int3 __ovld amd_min3(int3 src0, int3 src1, int3 src2);\n"
42819"int4 __ovld amd_min3(int4 src0, int4 src1, int4 src2);\n"
42820"int8 __ovld amd_min3(int8 src0, int8 src1, int8 src2);\n"
42821"int16 __ovld amd_min3(int16 src0, int16 src1, int16 src2);\n"
42822"\n"
42823"uint __ovld amd_min3(uint src0, uint src1, uint src2);\n"
42824"uint2 __ovld amd_min3(uint2 src0, uint2 src1, uint2 src2);\n"
42825"uint3 __ovld amd_min3(uint3 src0, uint3 src1, uint3 src2);\n"
42826"uint4 __ovld amd_min3(uint4 src0, uint4 src1, uint4 src2);\n"
42827"uint8 __ovld amd_min3(uint8 src0, uint8 src1, uint8 src2);\n"
42828"uint16 __ovld amd_min3(uint16 src0, uint16 src1, uint16 src2);\n"
42829"\n"
42830"ulong __ovld amd_mqsad(ulong src0, uint src1, ulong src2);\n"
42831"ulong2 __ovld amd_mqsad(ulong2 src0, uint2 src1, ulong2 src2);\n"
42832"ulong3 __ovld amd_mqsad(ulong3 src0, uint3 src1, ulong3 src2);\n"
42833"ulong4 __ovld amd_mqsad(ulong4 src0, uint4 src1, ulong4 src2);\n"
42834"ulong8 __ovld amd_mqsad(ulong8 src0, uint8 src1, ulong8 src2);\n"
42835"ulong16 __ovld amd_mqsad(ulong16 src0, uint16 src1, ulong16 src2);\n"
42836"\n"
42837"ulong __ovld amd_qsad(ulong src0, uint src1, ulong src2);\n"
42838"ulong2 __ovld amd_qsad(ulong2 src0, uint2 src1, ulong2 src2);\n"
42839"ulong3 __ovld amd_qsad(ulong3 src0, uint3 src1, ulong3 src2);\n"
42840"ulong4 __ovld amd_qsad(ulong4 src0, uint4 src1, ulong4 src2);\n"
42841"ulong8 __ovld amd_qsad(ulong8 src0, uint8 src1, ulong8 src2);\n"
42842"ulong16 __ovld amd_qsad(ulong16 src0, uint16 src1, ulong16 src2);\n"
42843"\n"
42844"uint __ovld amd_msad(uint src0, uint src1, uint src2);\n"
42845"uint2 __ovld amd_msad(uint2 src0, uint2 src1, uint2 src2);\n"
42846"uint3 __ovld amd_msad(uint3 src0, uint3 src1, uint3 src2);\n"
42847"uint4 __ovld amd_msad(uint4 src0, uint4 src1, uint4 src2);\n"
42848"uint8 __ovld amd_msad(uint8 src0, uint8 src1, uint8 src2);\n"
42849"uint16 __ovld amd_msad(uint16 src0, uint16 src1, uint16 src2);\n"
42850"\n"
42851"uint __ovld amd_sadd(uint src0, uint src1, uint src2);\n"
42852"uint2 __ovld amd_sadd(uint2 src0, uint2 src1, uint2 src2);\n"
42853"uint3 __ovld amd_sadd(uint3 src0, uint3 src1, uint3 src2);\n"
42854"uint4 __ovld amd_sadd(uint4 src0, uint4 src1, uint4 src2);\n"
42855"uint8 __ovld amd_sadd(uint8 src0, uint8 src1, uint8 src2);\n"
42856"uint16 __ovld amd_sadd(uint16 src0, uint16 src1, uint16 src2);\n"
42857"\n"
42858"uint __ovld amd_sadw(uint src0, uint src1, uint src2);\n"
42859"uint2 __ovld amd_sadw(uint2 src0, uint2 src1, uint2 src2);\n"
42860"uint3 __ovld amd_sadw(uint3 src0, uint3 src1, uint3 src2);\n"
42861"uint4 __ovld amd_sadw(uint4 src0, uint4 src1, uint4 src2);\n"
42862"uint8 __ovld amd_sadw(uint8 src0, uint8 src1, uint8 src2);\n"
42863"uint16 __ovld amd_sadw(uint16 src0, uint16 src1, uint16 src2);\n"
42864"#endif // cl_amd_media_ops2\n"
42865"\n"
42866"// Disable any extensions we may have enabled previously.\n"
42867"#pragma OPENCL EXTENSION all : disable\n"
42868"\n"
42869"#undef __cnfn\n"
42870"#undef __ovld\n"
42871"#endif //_OPENCL_H_\n"
42872"" } ,
42873 { "/builtins/pconfigintrin.h" , "/*===---- pconfigintrin.h - X86 platform configuration ---------------------===\n"
42874" *\n"
42875" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
42876" * of this software and associated documentation files (the \"Software\"), to deal\n"
42877" * in the Software without restriction, including without limitation the rights\n"
42878" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
42879" * copies of the Software, and to permit persons to whom the Software is\n"
42880" * furnished to do so, subject to the following conditions:\n"
42881" *\n"
42882" * The above copyright notice and this permission notice shall be included in\n"
42883" * all copies or substantial portions of the Software.\n"
42884" *\n"
42885" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
42886" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
42887" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
42888" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
42889" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
42890" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
42891" * THE SOFTWARE.\n"
42892" *\n"
42893" *===-----------------------------------------------------------------------===\n"
42894" */\n"
42895"\n"
42896"#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n"
42897"#error \"Never use <pconfigintrin.h> directly; include <x86intrin.h> instead.\"\n"
42898"#endif\n"
42899"\n"
42900"#ifndef __PCONFIGINTRIN_H\n"
42901"#define __PCONFIGINTRIN_H\n"
42902"\n"
42903"#define __PCONFIG_KEY_PROGRAM 0x00000001\n"
42904"\n"
42905"/* Define the default attributes for the functions in this file. */\n"
42906"#define __DEFAULT_FN_ATTRS \\\n"
42907" __attribute__((__always_inline__, __nodebug__, __target__(\"pconfig\")))\n"
42908"\n"
42909"static __inline unsigned int __DEFAULT_FN_ATTRS\n"
42910"_pconfig_u32(unsigned int __leaf, __SIZE_TYPE__ __d[])\n"
42911"{\n"
42912" unsigned int __result;\n"
42913" __asm__ (\"pconfig\"\n"
42914" : \"=a\" (__result), \"=b\" (__d[0]), \"=c\" (__d[1]), \"=d\" (__d[2])\n"
42915" : \"a\" (__leaf), \"b\" (__d[0]), \"c\" (__d[1]), \"d\" (__d[2])\n"
42916" : \"cc\");\n"
42917" return __result;\n"
42918"}\n"
42919"\n"
42920"#undef __DEFAULT_FN_ATTRS\n"
42921"\n"
42922"#endif\n"
42923"" } ,
42924 { "/builtins/pkuintrin.h" , "/*===---- pkuintrin.h - PKU intrinsics -------------------------------------===\n"
42925" *\n"
42926" *\n"
42927" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
42928" * of this software and associated documentation files (the \"Software\"), to deal\n"
42929" * in the Software without restriction, including without limitation the rights\n"
42930" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
42931" * copies of the Software, and to permit persons to whom the Software is\n"
42932" * furnished to do so, subject to the following conditions:\n"
42933" *\n"
42934" * The above copyright notice and this permission notice shall be included in\n"
42935" * all copies or substantial portions of the Software.\n"
42936" *\n"
42937" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
42938" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
42939" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
42940" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
42941" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
42942" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
42943" * THE SOFTWARE.\n"
42944" *\n"
42945" *===-----------------------------------------------------------------------===\n"
42946" */\n"
42947"#ifndef __IMMINTRIN_H\n"
42948"#error \"Never use <pkuintrin.h> directly; include <immintrin.h> instead.\"\n"
42949"#endif\n"
42950"\n"
42951"#ifndef __PKUINTRIN_H\n"
42952"#define __PKUINTRIN_H\n"
42953"\n"
42954"/* Define the default attributes for the functions in this file. */\n"
42955"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"pku\")))\n"
42956"\n"
42957"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
42958"_rdpkru_u32(void)\n"
42959"{\n"
42960" return __builtin_ia32_rdpkru();\n"
42961"}\n"
42962"\n"
42963"static __inline__ void __DEFAULT_FN_ATTRS\n"
42964"_wrpkru(unsigned int __val)\n"
42965"{\n"
42966" __builtin_ia32_wrpkru(__val);\n"
42967"}\n"
42968"\n"
42969"#undef __DEFAULT_FN_ATTRS\n"
42970"\n"
42971"#endif\n"
42972"" } ,
42973 { "/builtins/pmmintrin.h" , "/*===---- pmmintrin.h - SSE3 intrinsics ------------------------------------===\n"
42974" *\n"
42975" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
42976" * of this software and associated documentation files (the \"Software\"), to deal\n"
42977" * in the Software without restriction, including without limitation the rights\n"
42978" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
42979" * copies of the Software, and to permit persons to whom the Software is\n"
42980" * furnished to do so, subject to the following conditions:\n"
42981" *\n"
42982" * The above copyright notice and this permission notice shall be included in\n"
42983" * all copies or substantial portions of the Software.\n"
42984" *\n"
42985" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
42986" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
42987" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
42988" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
42989" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
42990" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
42991" * THE SOFTWARE.\n"
42992" *\n"
42993" *===-----------------------------------------------------------------------===\n"
42994" */\n"
42995"\n"
42996"#ifndef __PMMINTRIN_H\n"
42997"#define __PMMINTRIN_H\n"
42998"\n"
42999"#include <emmintrin.h>\n"
43000"\n"
43001"/* Define the default attributes for the functions in this file. */\n"
43002"#define __DEFAULT_FN_ATTRS \\\n"
43003" __attribute__((__always_inline__, __nodebug__, __target__(\"sse3\"), __min_vector_width__(128)))\n"
43004"\n"
43005"/// Loads data from an unaligned memory location to elements in a 128-bit\n"
43006"/// vector.\n"
43007"///\n"
43008"/// If the address of the data is not 16-byte aligned, the instruction may\n"
43009"/// read two adjacent aligned blocks of memory to retrieve the requested\n"
43010"/// data.\n"
43011"///\n"
43012"/// \\headerfile <x86intrin.h>\n"
43013"///\n"
43014"/// This intrinsic corresponds to the <c> VLDDQU </c> instruction.\n"
43015"///\n"
43016"/// \\param __p\n"
43017"/// A pointer to a 128-bit integer vector containing integer values.\n"
43018"/// \\returns A 128-bit vector containing the moved values.\n"
43019"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
43020"_mm_lddqu_si128(__m128i const *__p)\n"
43021"{\n"
43022" return (__m128i)__builtin_ia32_lddqu((char const *)__p);\n"
43023"}\n"
43024"\n"
43025"/// Adds the even-indexed values and subtracts the odd-indexed values of\n"
43026"/// two 128-bit vectors of [4 x float].\n"
43027"///\n"
43028"/// \\headerfile <x86intrin.h>\n"
43029"///\n"
43030"/// This intrinsic corresponds to the <c> VADDSUBPS </c> instruction.\n"
43031"///\n"
43032"/// \\param __a\n"
43033"/// A 128-bit vector of [4 x float] containing the left source operand.\n"
43034"/// \\param __b\n"
43035"/// A 128-bit vector of [4 x float] containing the right source operand.\n"
43036"/// \\returns A 128-bit vector of [4 x float] containing the alternating sums and\n"
43037"/// differences of both operands.\n"
43038"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
43039"_mm_addsub_ps(__m128 __a, __m128 __b)\n"
43040"{\n"
43041" return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b);\n"
43042"}\n"
43043"\n"
43044"/// Horizontally adds the adjacent pairs of values contained in two\n"
43045"/// 128-bit vectors of [4 x float].\n"
43046"///\n"
43047"/// \\headerfile <x86intrin.h>\n"
43048"///\n"
43049"/// This intrinsic corresponds to the <c> VHADDPS </c> instruction.\n"
43050"///\n"
43051"/// \\param __a\n"
43052"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
43053"/// The horizontal sums of the values are stored in the lower bits of the\n"
43054"/// destination.\n"
43055"/// \\param __b\n"
43056"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
43057"/// The horizontal sums of the values are stored in the upper bits of the\n"
43058"/// destination.\n"
43059"/// \\returns A 128-bit vector of [4 x float] containing the horizontal sums of\n"
43060"/// both operands.\n"
43061"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
43062"_mm_hadd_ps(__m128 __a, __m128 __b)\n"
43063"{\n"
43064" return __builtin_ia32_haddps((__v4sf)__a, (__v4sf)__b);\n"
43065"}\n"
43066"\n"
43067"/// Horizontally subtracts the adjacent pairs of values contained in two\n"
43068"/// 128-bit vectors of [4 x float].\n"
43069"///\n"
43070"/// \\headerfile <x86intrin.h>\n"
43071"///\n"
43072"/// This intrinsic corresponds to the <c> VHSUBPS </c> instruction.\n"
43073"///\n"
43074"/// \\param __a\n"
43075"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
43076"/// The horizontal differences between the values are stored in the lower\n"
43077"/// bits of the destination.\n"
43078"/// \\param __b\n"
43079"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
43080"/// The horizontal differences between the values are stored in the upper\n"
43081"/// bits of the destination.\n"
43082"/// \\returns A 128-bit vector of [4 x float] containing the horizontal\n"
43083"/// differences of both operands.\n"
43084"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
43085"_mm_hsub_ps(__m128 __a, __m128 __b)\n"
43086"{\n"
43087" return __builtin_ia32_hsubps((__v4sf)__a, (__v4sf)__b);\n"
43088"}\n"
43089"\n"
43090"/// Moves and duplicates odd-indexed values from a 128-bit vector\n"
43091"/// of [4 x float] to float values stored in a 128-bit vector of\n"
43092"/// [4 x float].\n"
43093"///\n"
43094"/// \\headerfile <x86intrin.h>\n"
43095"///\n"
43096"/// This intrinsic corresponds to the <c> VMOVSHDUP </c> instruction.\n"
43097"///\n"
43098"/// \\param __a\n"
43099"/// A 128-bit vector of [4 x float]. \\n\n"
43100"/// Bits [127:96] of the source are written to bits [127:96] and [95:64] of\n"
43101"/// the destination. \\n\n"
43102"/// Bits [63:32] of the source are written to bits [63:32] and [31:0] of the\n"
43103"/// destination.\n"
43104"/// \\returns A 128-bit vector of [4 x float] containing the moved and duplicated\n"
43105"/// values.\n"
43106"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
43107"_mm_movehdup_ps(__m128 __a)\n"
43108"{\n"
43109" return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 1, 1, 3, 3);\n"
43110"}\n"
43111"\n"
43112"/// Duplicates even-indexed values from a 128-bit vector of\n"
43113"/// [4 x float] to float values stored in a 128-bit vector of [4 x float].\n"
43114"///\n"
43115"/// \\headerfile <x86intrin.h>\n"
43116"///\n"
43117"/// This intrinsic corresponds to the <c> VMOVSLDUP </c> instruction.\n"
43118"///\n"
43119"/// \\param __a\n"
43120"/// A 128-bit vector of [4 x float] \\n\n"
43121"/// Bits [95:64] of the source are written to bits [127:96] and [95:64] of\n"
43122"/// the destination. \\n\n"
43123"/// Bits [31:0] of the source are written to bits [63:32] and [31:0] of the\n"
43124"/// destination.\n"
43125"/// \\returns A 128-bit vector of [4 x float] containing the moved and duplicated\n"
43126"/// values.\n"
43127"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
43128"_mm_moveldup_ps(__m128 __a)\n"
43129"{\n"
43130" return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 2, 2);\n"
43131"}\n"
43132"\n"
43133"/// Adds the even-indexed values and subtracts the odd-indexed values of\n"
43134"/// two 128-bit vectors of [2 x double].\n"
43135"///\n"
43136"/// \\headerfile <x86intrin.h>\n"
43137"///\n"
43138"/// This intrinsic corresponds to the <c> VADDSUBPD </c> instruction.\n"
43139"///\n"
43140"/// \\param __a\n"
43141"/// A 128-bit vector of [2 x double] containing the left source operand.\n"
43142"/// \\param __b\n"
43143"/// A 128-bit vector of [2 x double] containing the right source operand.\n"
43144"/// \\returns A 128-bit vector of [2 x double] containing the alternating sums\n"
43145"/// and differences of both operands.\n"
43146"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
43147"_mm_addsub_pd(__m128d __a, __m128d __b)\n"
43148"{\n"
43149" return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b);\n"
43150"}\n"
43151"\n"
43152"/// Horizontally adds the pairs of values contained in two 128-bit\n"
43153"/// vectors of [2 x double].\n"
43154"///\n"
43155"/// \\headerfile <x86intrin.h>\n"
43156"///\n"
43157"/// This intrinsic corresponds to the <c> VHADDPD </c> instruction.\n"
43158"///\n"
43159"/// \\param __a\n"
43160"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
43161"/// The horizontal sum of the values is stored in the lower bits of the\n"
43162"/// destination.\n"
43163"/// \\param __b\n"
43164"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
43165"/// The horizontal sum of the values is stored in the upper bits of the\n"
43166"/// destination.\n"
43167"/// \\returns A 128-bit vector of [2 x double] containing the horizontal sums of\n"
43168"/// both operands.\n"
43169"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
43170"_mm_hadd_pd(__m128d __a, __m128d __b)\n"
43171"{\n"
43172" return __builtin_ia32_haddpd((__v2df)__a, (__v2df)__b);\n"
43173"}\n"
43174"\n"
43175"/// Horizontally subtracts the pairs of values contained in two 128-bit\n"
43176"/// vectors of [2 x double].\n"
43177"///\n"
43178"/// \\headerfile <x86intrin.h>\n"
43179"///\n"
43180"/// This intrinsic corresponds to the <c> VHSUBPD </c> instruction.\n"
43181"///\n"
43182"/// \\param __a\n"
43183"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
43184"/// The horizontal difference of the values is stored in the lower bits of\n"
43185"/// the destination.\n"
43186"/// \\param __b\n"
43187"/// A 128-bit vector of [2 x double] containing one of the source operands.\n"
43188"/// The horizontal difference of the values is stored in the upper bits of\n"
43189"/// the destination.\n"
43190"/// \\returns A 128-bit vector of [2 x double] containing the horizontal\n"
43191"/// differences of both operands.\n"
43192"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
43193"_mm_hsub_pd(__m128d __a, __m128d __b)\n"
43194"{\n"
43195" return __builtin_ia32_hsubpd((__v2df)__a, (__v2df)__b);\n"
43196"}\n"
43197"\n"
43198"/// Moves and duplicates one double-precision value to double-precision\n"
43199"/// values stored in a 128-bit vector of [2 x double].\n"
43200"///\n"
43201"/// \\headerfile <x86intrin.h>\n"
43202"///\n"
43203"/// \\code\n"
43204"/// __m128d _mm_loaddup_pd(double const *dp);\n"
43205"/// \\endcode\n"
43206"///\n"
43207"/// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction.\n"
43208"///\n"
43209"/// \\param dp\n"
43210"/// A pointer to a double-precision value to be moved and duplicated.\n"
43211"/// \\returns A 128-bit vector of [2 x double] containing the moved and\n"
43212"/// duplicated values.\n"
43213"#define _mm_loaddup_pd(dp) _mm_load1_pd(dp)\n"
43214"\n"
43215"/// Moves and duplicates the double-precision value in the lower bits of\n"
43216"/// a 128-bit vector of [2 x double] to double-precision values stored in a\n"
43217"/// 128-bit vector of [2 x double].\n"
43218"///\n"
43219"/// \\headerfile <x86intrin.h>\n"
43220"///\n"
43221"/// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction.\n"
43222"///\n"
43223"/// \\param __a\n"
43224"/// A 128-bit vector of [2 x double]. Bits [63:0] are written to bits\n"
43225"/// [127:64] and [63:0] of the destination.\n"
43226"/// \\returns A 128-bit vector of [2 x double] containing the moved and\n"
43227"/// duplicated values.\n"
43228"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
43229"_mm_movedup_pd(__m128d __a)\n"
43230"{\n"
43231" return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);\n"
43232"}\n"
43233"\n"
43234"/// Establishes a linear address memory range to be monitored and puts\n"
43235"/// the processor in the monitor event pending state. Data stored in the\n"
43236"/// monitored address range causes the processor to exit the pending state.\n"
43237"///\n"
43238"/// \\headerfile <x86intrin.h>\n"
43239"///\n"
43240"/// This intrinsic corresponds to the <c> MONITOR </c> instruction.\n"
43241"///\n"
43242"/// \\param __p\n"
43243"/// The memory range to be monitored. The size of the range is determined by\n"
43244"/// CPUID function 0000_0005h.\n"
43245"/// \\param __extensions\n"
43246"/// Optional extensions for the monitoring state.\n"
43247"/// \\param __hints\n"
43248"/// Optional hints for the monitoring state.\n"
43249"static __inline__ void __DEFAULT_FN_ATTRS\n"
43250"_mm_monitor(void const *__p, unsigned __extensions, unsigned __hints)\n"
43251"{\n"
43252" __builtin_ia32_monitor((void *)__p, __extensions, __hints);\n"
43253"}\n"
43254"\n"
43255"/// Used with the MONITOR instruction to wait while the processor is in\n"
43256"/// the monitor event pending state. Data stored in the monitored address\n"
43257"/// range causes the processor to exit the pending state.\n"
43258"///\n"
43259"/// \\headerfile <x86intrin.h>\n"
43260"///\n"
43261"/// This intrinsic corresponds to the <c> MWAIT </c> instruction.\n"
43262"///\n"
43263"/// \\param __extensions\n"
43264"/// Optional extensions for the monitoring state, which may vary by\n"
43265"/// processor.\n"
43266"/// \\param __hints\n"
43267"/// Optional hints for the monitoring state, which may vary by processor.\n"
43268"static __inline__ void __DEFAULT_FN_ATTRS\n"
43269"_mm_mwait(unsigned __extensions, unsigned __hints)\n"
43270"{\n"
43271" __builtin_ia32_mwait(__extensions, __hints);\n"
43272"}\n"
43273"\n"
43274"#undef __DEFAULT_FN_ATTRS\n"
43275"\n"
43276"#endif /* __PMMINTRIN_H */\n"
43277"" } ,
43278 { "/builtins/popcntintrin.h" , "/*===---- popcntintrin.h - POPCNT intrinsics -------------------------------===\n"
43279" *\n"
43280" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
43281" * of this software and associated documentation files (the \"Software\"), to deal\n"
43282" * in the Software without restriction, including without limitation the rights\n"
43283" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
43284" * copies of the Software, and to permit persons to whom the Software is\n"
43285" * furnished to do so, subject to the following conditions:\n"
43286" *\n"
43287" * The above copyright notice and this permission notice shall be included in\n"
43288" * all copies or substantial portions of the Software.\n"
43289" *\n"
43290" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
43291" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
43292" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
43293" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
43294" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
43295" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
43296" * THE SOFTWARE.\n"
43297" *\n"
43298" *===-----------------------------------------------------------------------===\n"
43299" */\n"
43300"\n"
43301"#ifndef __POPCNTINTRIN_H\n"
43302"#define __POPCNTINTRIN_H\n"
43303"\n"
43304"/* Define the default attributes for the functions in this file. */\n"
43305"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"popcnt\")))\n"
43306"\n"
43307"/// Counts the number of bits in the source operand having a value of 1.\n"
43308"///\n"
43309"/// \\headerfile <x86intrin.h>\n"
43310"///\n"
43311"/// This intrinsic corresponds to the <c> POPCNT </c> instruction.\n"
43312"///\n"
43313"/// \\param __A\n"
43314"/// An unsigned 32-bit integer operand.\n"
43315"/// \\returns A 32-bit integer containing the number of bits with value 1 in the\n"
43316"/// source operand.\n"
43317"static __inline__ int __DEFAULT_FN_ATTRS\n"
43318"_mm_popcnt_u32(unsigned int __A)\n"
43319"{\n"
43320" return __builtin_popcount(__A);\n"
43321"}\n"
43322"\n"
43323"/// Counts the number of bits in the source operand having a value of 1.\n"
43324"///\n"
43325"/// \\headerfile <x86intrin.h>\n"
43326"///\n"
43327"/// This intrinsic corresponds to the <c> POPCNT </c> instruction.\n"
43328"///\n"
43329"/// \\param __A\n"
43330"/// A signed 32-bit integer operand.\n"
43331"/// \\returns A 32-bit integer containing the number of bits with value 1 in the\n"
43332"/// source operand.\n"
43333"static __inline__ int __DEFAULT_FN_ATTRS\n"
43334"_popcnt32(int __A)\n"
43335"{\n"
43336" return __builtin_popcount(__A);\n"
43337"}\n"
43338"\n"
43339"#ifdef __x86_64__\n"
43340"/// Counts the number of bits in the source operand having a value of 1.\n"
43341"///\n"
43342"/// \\headerfile <x86intrin.h>\n"
43343"///\n"
43344"/// This intrinsic corresponds to the <c> POPCNT </c> instruction.\n"
43345"///\n"
43346"/// \\param __A\n"
43347"/// An unsigned 64-bit integer operand.\n"
43348"/// \\returns A 64-bit integer containing the number of bits with value 1 in the\n"
43349"/// source operand.\n"
43350"static __inline__ long long __DEFAULT_FN_ATTRS\n"
43351"_mm_popcnt_u64(unsigned long long __A)\n"
43352"{\n"
43353" return __builtin_popcountll(__A);\n"
43354"}\n"
43355"\n"
43356"/// Counts the number of bits in the source operand having a value of 1.\n"
43357"///\n"
43358"/// \\headerfile <x86intrin.h>\n"
43359"///\n"
43360"/// This intrinsic corresponds to the <c> POPCNT </c> instruction.\n"
43361"///\n"
43362"/// \\param __A\n"
43363"/// A signed 64-bit integer operand.\n"
43364"/// \\returns A 64-bit integer containing the number of bits with value 1 in the\n"
43365"/// source operand.\n"
43366"static __inline__ long long __DEFAULT_FN_ATTRS\n"
43367"_popcnt64(long long __A)\n"
43368"{\n"
43369" return __builtin_popcountll(__A);\n"
43370"}\n"
43371"#endif /* __x86_64__ */\n"
43372"\n"
43373"#undef __DEFAULT_FN_ATTRS\n"
43374"\n"
43375"#endif /* __POPCNTINTRIN_H */\n"
43376"" } ,
43377 { "/builtins/prfchwintrin.h" , "/*===---- prfchwintrin.h - PREFETCHW intrinsic -----------------------------===\n"
43378" *\n"
43379" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
43380" * of this software and associated documentation files (the \"Software\"), to deal\n"
43381" * in the Software without restriction, including without limitation the rights\n"
43382" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
43383" * copies of the Software, and to permit persons to whom the Software is\n"
43384" * furnished to do so, subject to the following conditions:\n"
43385" *\n"
43386" * The above copyright notice and this permission notice shall be included in\n"
43387" * all copies or substantial portions of the Software.\n"
43388" *\n"
43389" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
43390" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
43391" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
43392" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
43393" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
43394" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
43395" * THE SOFTWARE.\n"
43396" *\n"
43397" *===-----------------------------------------------------------------------===\n"
43398" */\n"
43399"\n"
43400"#if !defined(__X86INTRIN_H) && !defined(_MM3DNOW_H_INCLUDED)\n"
43401"#error \"Never use <prfchwintrin.h> directly; include <x86intrin.h> or <mm3dnow.h> instead.\"\n"
43402"#endif\n"
43403"\n"
43404"#ifndef __PRFCHWINTRIN_H\n"
43405"#define __PRFCHWINTRIN_H\n"
43406"\n"
43407"/// Loads a memory sequence containing the specified memory address into\n"
43408"/// all data cache levels. The cache-coherency state is set to exclusive.\n"
43409"/// Data can be read from and written to the cache line without additional\n"
43410"/// delay.\n"
43411"///\n"
43412"/// \\headerfile <x86intrin.h>\n"
43413"///\n"
43414"/// This intrinsic corresponds to the \\c PREFETCHT0 instruction.\n"
43415"///\n"
43416"/// \\param __P\n"
43417"/// A pointer specifying the memory address to be prefetched.\n"
43418"static __inline__ void __attribute__((__always_inline__, __nodebug__))\n"
43419"_m_prefetch(void *__P)\n"
43420"{\n"
43421" __builtin_prefetch (__P, 0, 3 /* _MM_HINT_T0 */);\n"
43422"}\n"
43423"\n"
43424"/// Loads a memory sequence containing the specified memory address into\n"
43425"/// the L1 data cache and sets the cache-coherency to modified. This\n"
43426"/// provides a hint to the processor that the cache line will be modified.\n"
43427"/// It is intended for use when the cache line will be written to shortly\n"
43428"/// after the prefetch is performed.\n"
43429"///\n"
43430"/// Note that the effect of this intrinsic is dependent on the processor\n"
43431"/// implementation.\n"
43432"///\n"
43433"/// \\headerfile <x86intrin.h>\n"
43434"///\n"
43435"/// This intrinsic corresponds to the \\c PREFETCHW instruction.\n"
43436"///\n"
43437"/// \\param __P\n"
43438"/// A pointer specifying the memory address to be prefetched.\n"
43439"static __inline__ void __attribute__((__always_inline__, __nodebug__))\n"
43440"_m_prefetchw(void *__P)\n"
43441"{\n"
43442" __builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */);\n"
43443"}\n"
43444"\n"
43445"#endif /* __PRFCHWINTRIN_H */\n"
43446"" } ,
43447 { "/builtins/ptwriteintrin.h" , "/*===------------ ptwriteintrin.h - PTWRITE intrinsic --------------------===\n"
43448" *\n"
43449" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
43450" * of this software and associated documentation files (the \"Software\"), to deal\n"
43451" * in the Software without restriction, including without limitation the rights\n"
43452" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
43453" * copies of the Software, and to permit persons to whom the Software is\n"
43454" * furnished to do so, subject to the following conditions:\n"
43455" *\n"
43456" * The above copyright notice and this permission notice shall be included in\n"
43457" * all copies or substantial portions of the Software.\n"
43458" *\n"
43459" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
43460" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
43461" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
43462" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
43463" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
43464" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
43465" * THE SOFTWARE.\n"
43466" *\n"
43467" *===-----------------------------------------------------------------------===\n"
43468" */\n"
43469"\n"
43470"#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n"
43471"#error \"Never use <ptwriteintrin.h> directly; include <x86intrin.h> instead.\"\n"
43472"#endif\n"
43473"\n"
43474"#ifndef __PTWRITEINTRIN_H\n"
43475"#define __PTWRITEINTRIN_H\n"
43476"\n"
43477"/* Define the default attributes for the functions in this file. */\n"
43478"#define __DEFAULT_FN_ATTRS \\\n"
43479" __attribute__((__always_inline__, __nodebug__, __target__(\"ptwrite\")))\n"
43480"\n"
43481"static __inline__ void __DEFAULT_FN_ATTRS\n"
43482"_ptwrite32(unsigned int __value) {\n"
43483" __builtin_ia32_ptwrite32(__value);\n"
43484"}\n"
43485"\n"
43486"#ifdef __x86_64__\n"
43487"\n"
43488"static __inline__ void __DEFAULT_FN_ATTRS\n"
43489"_ptwrite64(unsigned long long __value) {\n"
43490" __builtin_ia32_ptwrite64(__value);\n"
43491"}\n"
43492"\n"
43493"#endif /* __x86_64__ */\n"
43494"\n"
43495"#undef __DEFAULT_FN_ATTRS\n"
43496"\n"
43497"#endif /* __PTWRITEINTRIN_H */\n"
43498"" } ,
43499 { "/builtins/rdseedintrin.h" , "/*===---- rdseedintrin.h - RDSEED intrinsics -------------------------------===\n"
43500" *\n"
43501" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
43502" * of this software and associated documentation files (the \"Software\"), to deal\n"
43503" * in the Software without restriction, including without limitation the rights\n"
43504" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
43505" * copies of the Software, and to permit persons to whom the Software is\n"
43506" * furnished to do so, subject to the following conditions:\n"
43507" *\n"
43508" * The above copyright notice and this permission notice shall be included in\n"
43509" * all copies or substantial portions of the Software.\n"
43510" *\n"
43511" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
43512" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
43513" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
43514" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
43515" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
43516" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
43517" * THE SOFTWARE.\n"
43518" *\n"
43519" *===-----------------------------------------------------------------------===\n"
43520" */\n"
43521"\n"
43522"#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n"
43523"#error \"Never use <rdseedintrin.h> directly; include <x86intrin.h> instead.\"\n"
43524"#endif\n"
43525"\n"
43526"#ifndef __RDSEEDINTRIN_H\n"
43527"#define __RDSEEDINTRIN_H\n"
43528"\n"
43529"/* Define the default attributes for the functions in this file. */\n"
43530"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"rdseed\")))\n"
43531"\n"
43532"static __inline__ int __DEFAULT_FN_ATTRS\n"
43533"_rdseed16_step(unsigned short *__p)\n"
43534"{\n"
43535" return __builtin_ia32_rdseed16_step(__p);\n"
43536"}\n"
43537"\n"
43538"static __inline__ int __DEFAULT_FN_ATTRS\n"
43539"_rdseed32_step(unsigned int *__p)\n"
43540"{\n"
43541" return __builtin_ia32_rdseed32_step(__p);\n"
43542"}\n"
43543"\n"
43544"#ifdef __x86_64__\n"
43545"static __inline__ int __DEFAULT_FN_ATTRS\n"
43546"_rdseed64_step(unsigned long long *__p)\n"
43547"{\n"
43548" return __builtin_ia32_rdseed64_step(__p);\n"
43549"}\n"
43550"#endif\n"
43551"\n"
43552"#undef __DEFAULT_FN_ATTRS\n"
43553"\n"
43554"#endif /* __RDSEEDINTRIN_H */\n"
43555"" } ,
43556 { "/builtins/rtmintrin.h" , "/*===---- rtmintrin.h - RTM intrinsics -------------------------------------===\n"
43557" *\n"
43558" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
43559" * of this software and associated documentation files (the \"Software\"), to deal\n"
43560" * in the Software without restriction, including without limitation the rights\n"
43561" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
43562" * copies of the Software, and to permit persons to whom the Software is\n"
43563" * furnished to do so, subject to the following conditions:\n"
43564" *\n"
43565" * The above copyright notice and this permission notice shall be included in\n"
43566" * all copies or substantial portions of the Software.\n"
43567" *\n"
43568" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
43569" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
43570" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
43571" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
43572" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
43573" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
43574" * THE SOFTWARE.\n"
43575" *\n"
43576" *===-----------------------------------------------------------------------===\n"
43577" */\n"
43578"\n"
43579"#ifndef __IMMINTRIN_H\n"
43580"#error \"Never use <rtmintrin.h> directly; include <immintrin.h> instead.\"\n"
43581"#endif\n"
43582"\n"
43583"#ifndef __RTMINTRIN_H\n"
43584"#define __RTMINTRIN_H\n"
43585"\n"
43586"#define _XBEGIN_STARTED (~0u)\n"
43587"#define _XABORT_EXPLICIT (1 << 0)\n"
43588"#define _XABORT_RETRY (1 << 1)\n"
43589"#define _XABORT_CONFLICT (1 << 2)\n"
43590"#define _XABORT_CAPACITY (1 << 3)\n"
43591"#define _XABORT_DEBUG (1 << 4)\n"
43592"#define _XABORT_NESTED (1 << 5)\n"
43593"#define _XABORT_CODE(x) (((x) >> 24) & 0xFF)\n"
43594"\n"
43595"/* Define the default attributes for the functions in this file. */\n"
43596"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"rtm\")))\n"
43597"\n"
43598"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
43599"_xbegin(void)\n"
43600"{\n"
43601" return __builtin_ia32_xbegin();\n"
43602"}\n"
43603"\n"
43604"static __inline__ void __DEFAULT_FN_ATTRS\n"
43605"_xend(void)\n"
43606"{\n"
43607" __builtin_ia32_xend();\n"
43608"}\n"
43609"\n"
43610"#define _xabort(imm) __builtin_ia32_xabort((imm))\n"
43611"\n"
43612"#undef __DEFAULT_FN_ATTRS\n"
43613"\n"
43614"#endif /* __RTMINTRIN_H */\n"
43615"" } ,
43616 { "/builtins/s390intrin.h" , "/*===---- s390intrin.h - SystemZ intrinsics --------------------------------===\n"
43617" *\n"
43618" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
43619" * of this software and associated documentation files (the \"Software\"), to deal\n"
43620" * in the Software without restriction, including without limitation the rights\n"
43621" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
43622" * copies of the Software, and to permit persons to whom the Software is\n"
43623" * furnished to do so, subject to the following conditions:\n"
43624" *\n"
43625" * The above copyright notice and this permission notice shall be included in\n"
43626" * all copies or substantial portions of the Software.\n"
43627" *\n"
43628" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
43629" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
43630" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
43631" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
43632" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
43633" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
43634" * THE SOFTWARE.\n"
43635" *\n"
43636" *===-----------------------------------------------------------------------===\n"
43637" */\n"
43638"\n"
43639"#ifndef __S390INTRIN_H\n"
43640"#define __S390INTRIN_H\n"
43641"\n"
43642"#ifndef __s390__\n"
43643"#error \"<s390intrin.h> is for s390 only\"\n"
43644"#endif\n"
43645"\n"
43646"#ifdef __HTM__\n"
43647"#include <htmintrin.h>\n"
43648"#endif\n"
43649"\n"
43650"#ifdef __VEC__\n"
43651"#include <vecintrin.h>\n"
43652"#endif\n"
43653"\n"
43654"#endif /* __S390INTRIN_H*/\n"
43655"" } ,
43656 { "/builtins/sgxintrin.h" , "/*===---- sgxintrin.h - X86 SGX intrinsics configuration -------------------===\n"
43657" *\n"
43658" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
43659" * of this software and associated documentation files (the \"Software\"), to deal\n"
43660" * in the Software without restriction, including without limitation the rights\n"
43661" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
43662" * copies of the Software, and to permit persons to whom the Software is\n"
43663" * furnished to do so, subject to the following conditions:\n"
43664" *\n"
43665" * The above copyright notice and this permission notice shall be included in\n"
43666" * all copies or substantial portions of the Software.\n"
43667" *\n"
43668" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
43669" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
43670" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
43671" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
43672" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
43673" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
43674" * THE SOFTWARE.\n"
43675" *\n"
43676" *===-----------------------------------------------------------------------===\n"
43677" */\n"
43678"\n"
43679"#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n"
43680"#error \"Never use <sgxintrin.h> directly; include <x86intrin.h> instead.\"\n"
43681"#endif\n"
43682"\n"
43683"#ifndef __SGXINTRIN_H\n"
43684"#define __SGXINTRIN_H\n"
43685"\n"
43686"/* Define the default attributes for the functions in this file. */\n"
43687"#define __DEFAULT_FN_ATTRS \\\n"
43688" __attribute__((__always_inline__, __nodebug__, __target__(\"sgx\")))\n"
43689"\n"
43690"static __inline unsigned int __DEFAULT_FN_ATTRS\n"
43691"_enclu_u32(unsigned int __leaf, __SIZE_TYPE__ __d[])\n"
43692"{\n"
43693" unsigned int __result;\n"
43694" __asm__ (\"enclu\"\n"
43695" : \"=a\" (__result), \"=b\" (__d[0]), \"=c\" (__d[1]), \"=d\" (__d[2])\n"
43696" : \"a\" (__leaf), \"b\" (__d[0]), \"c\" (__d[1]), \"d\" (__d[2])\n"
43697" : \"cc\");\n"
43698" return __result;\n"
43699"}\n"
43700"\n"
43701"static __inline unsigned int __DEFAULT_FN_ATTRS\n"
43702"_encls_u32(unsigned int __leaf, __SIZE_TYPE__ __d[])\n"
43703"{\n"
43704" unsigned int __result;\n"
43705" __asm__ (\"encls\"\n"
43706" : \"=a\" (__result), \"=b\" (__d[0]), \"=c\" (__d[1]), \"=d\" (__d[2])\n"
43707" : \"a\" (__leaf), \"b\" (__d[0]), \"c\" (__d[1]), \"d\" (__d[2])\n"
43708" : \"cc\");\n"
43709" return __result;\n"
43710"}\n"
43711"\n"
43712"static __inline unsigned int __DEFAULT_FN_ATTRS\n"
43713"_enclv_u32(unsigned int __leaf, __SIZE_TYPE__ __d[])\n"
43714"{\n"
43715" unsigned int __result;\n"
43716" __asm__ (\"enclv\"\n"
43717" : \"=a\" (__result), \"=b\" (__d[0]), \"=c\" (__d[1]), \"=d\" (__d[2])\n"
43718" : \"a\" (__leaf), \"b\" (__d[0]), \"c\" (__d[1]), \"d\" (__d[2])\n"
43719" : \"cc\");\n"
43720" return __result;\n"
43721"}\n"
43722"\n"
43723"#undef __DEFAULT_FN_ATTRS\n"
43724"\n"
43725"#endif\n"
43726"" } ,
43727 { "/builtins/shaintrin.h" , "/*===---- shaintrin.h - SHA intrinsics -------------------------------------===\n"
43728" *\n"
43729" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
43730" * of this software and associated documentation files (the \"Software\"), to deal\n"
43731" * in the Software without restriction, including without limitation the rights\n"
43732" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
43733" * copies of the Software, and to permit persons to whom the Software is\n"
43734" * furnished to do so, subject to the following conditions:\n"
43735" *\n"
43736" * The above copyright notice and this permission notice shall be included in\n"
43737" * all copies or substantial portions of the Software.\n"
43738" *\n"
43739" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
43740" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
43741" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
43742" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
43743" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
43744" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
43745" * THE SOFTWARE.\n"
43746" *\n"
43747" *===-----------------------------------------------------------------------===\n"
43748" */\n"
43749"\n"
43750"#ifndef __IMMINTRIN_H\n"
43751"#error \"Never use <shaintrin.h> directly; include <immintrin.h> instead.\"\n"
43752"#endif\n"
43753"\n"
43754"#ifndef __SHAINTRIN_H\n"
43755"#define __SHAINTRIN_H\n"
43756"\n"
43757"/* Define the default attributes for the functions in this file. */\n"
43758"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"sha\"), __min_vector_width__(128)))\n"
43759"\n"
43760"#define _mm_sha1rnds4_epu32(V1, V2, M) \\\n"
43761" __builtin_ia32_sha1rnds4((__v4si)(__m128i)(V1), (__v4si)(__m128i)(V2), (M))\n"
43762"\n"
43763"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
43764"_mm_sha1nexte_epu32(__m128i __X, __m128i __Y)\n"
43765"{\n"
43766" return (__m128i)__builtin_ia32_sha1nexte((__v4si)__X, (__v4si)__Y);\n"
43767"}\n"
43768"\n"
43769"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
43770"_mm_sha1msg1_epu32(__m128i __X, __m128i __Y)\n"
43771"{\n"
43772" return (__m128i)__builtin_ia32_sha1msg1((__v4si)__X, (__v4si)__Y);\n"
43773"}\n"
43774"\n"
43775"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
43776"_mm_sha1msg2_epu32(__m128i __X, __m128i __Y)\n"
43777"{\n"
43778" return (__m128i)__builtin_ia32_sha1msg2((__v4si)__X, (__v4si)__Y);\n"
43779"}\n"
43780"\n"
43781"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
43782"_mm_sha256rnds2_epu32(__m128i __X, __m128i __Y, __m128i __Z)\n"
43783"{\n"
43784" return (__m128i)__builtin_ia32_sha256rnds2((__v4si)__X, (__v4si)__Y, (__v4si)__Z);\n"
43785"}\n"
43786"\n"
43787"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
43788"_mm_sha256msg1_epu32(__m128i __X, __m128i __Y)\n"
43789"{\n"
43790" return (__m128i)__builtin_ia32_sha256msg1((__v4si)__X, (__v4si)__Y);\n"
43791"}\n"
43792"\n"
43793"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
43794"_mm_sha256msg2_epu32(__m128i __X, __m128i __Y)\n"
43795"{\n"
43796" return (__m128i)__builtin_ia32_sha256msg2((__v4si)__X, (__v4si)__Y);\n"
43797"}\n"
43798"\n"
43799"#undef __DEFAULT_FN_ATTRS\n"
43800"\n"
43801"#endif /* __SHAINTRIN_H */\n"
43802"" } ,
43803 { "/builtins/smmintrin.h" , "/*===---- smmintrin.h - SSE4 intrinsics ------------------------------------===\n"
43804" *\n"
43805" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
43806" * of this software and associated documentation files (the \"Software\"), to deal\n"
43807" * in the Software without restriction, including without limitation the rights\n"
43808" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
43809" * copies of the Software, and to permit persons to whom the Software is\n"
43810" * furnished to do so, subject to the following conditions:\n"
43811" *\n"
43812" * The above copyright notice and this permission notice shall be included in\n"
43813" * all copies or substantial portions of the Software.\n"
43814" *\n"
43815" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
43816" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
43817" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
43818" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
43819" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
43820" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
43821" * THE SOFTWARE.\n"
43822" *\n"
43823" *===-----------------------------------------------------------------------===\n"
43824" */\n"
43825"\n"
43826"#ifndef __SMMINTRIN_H\n"
43827"#define __SMMINTRIN_H\n"
43828"\n"
43829"#include <tmmintrin.h>\n"
43830"\n"
43831"/* Define the default attributes for the functions in this file. */\n"
43832"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"sse4.1\"), __min_vector_width__(128)))\n"
43833"\n"
43834"/* SSE4 Rounding macros. */\n"
43835"#define _MM_FROUND_TO_NEAREST_INT 0x00\n"
43836"#define _MM_FROUND_TO_NEG_INF 0x01\n"
43837"#define _MM_FROUND_TO_POS_INF 0x02\n"
43838"#define _MM_FROUND_TO_ZERO 0x03\n"
43839"#define _MM_FROUND_CUR_DIRECTION 0x04\n"
43840"\n"
43841"#define _MM_FROUND_RAISE_EXC 0x00\n"
43842"#define _MM_FROUND_NO_EXC 0x08\n"
43843"\n"
43844"#define _MM_FROUND_NINT (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEAREST_INT)\n"
43845"#define _MM_FROUND_FLOOR (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEG_INF)\n"
43846"#define _MM_FROUND_CEIL (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_POS_INF)\n"
43847"#define _MM_FROUND_TRUNC (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_ZERO)\n"
43848"#define _MM_FROUND_RINT (_MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION)\n"
43849"#define _MM_FROUND_NEARBYINT (_MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION)\n"
43850"\n"
43851"/// Rounds up each element of the 128-bit vector of [4 x float] to an\n"
43852"/// integer and returns the rounded values in a 128-bit vector of\n"
43853"/// [4 x float].\n"
43854"///\n"
43855"/// \\headerfile <x86intrin.h>\n"
43856"///\n"
43857"/// \\code\n"
43858"/// __m128 _mm_ceil_ps(__m128 X);\n"
43859"/// \\endcode\n"
43860"///\n"
43861"/// This intrinsic corresponds to the <c> VROUNDPS / ROUNDPS </c> instruction.\n"
43862"///\n"
43863"/// \\param X\n"
43864"/// A 128-bit vector of [4 x float] values to be rounded up.\n"
43865"/// \\returns A 128-bit vector of [4 x float] containing the rounded values.\n"
43866"#define _mm_ceil_ps(X) _mm_round_ps((X), _MM_FROUND_CEIL)\n"
43867"\n"
43868"/// Rounds up each element of the 128-bit vector of [2 x double] to an\n"
43869"/// integer and returns the rounded values in a 128-bit vector of\n"
43870"/// [2 x double].\n"
43871"///\n"
43872"/// \\headerfile <x86intrin.h>\n"
43873"///\n"
43874"/// \\code\n"
43875"/// __m128d _mm_ceil_pd(__m128d X);\n"
43876"/// \\endcode\n"
43877"///\n"
43878"/// This intrinsic corresponds to the <c> VROUNDPD / ROUNDPD </c> instruction.\n"
43879"///\n"
43880"/// \\param X\n"
43881"/// A 128-bit vector of [2 x double] values to be rounded up.\n"
43882"/// \\returns A 128-bit vector of [2 x double] containing the rounded values.\n"
43883"#define _mm_ceil_pd(X) _mm_round_pd((X), _MM_FROUND_CEIL)\n"
43884"\n"
43885"/// Copies three upper elements of the first 128-bit vector operand to\n"
43886"/// the corresponding three upper elements of the 128-bit result vector of\n"
43887"/// [4 x float]. Rounds up the lowest element of the second 128-bit vector\n"
43888"/// operand to an integer and copies it to the lowest element of the 128-bit\n"
43889"/// result vector of [4 x float].\n"
43890"///\n"
43891"/// \\headerfile <x86intrin.h>\n"
43892"///\n"
43893"/// \\code\n"
43894"/// __m128 _mm_ceil_ss(__m128 X, __m128 Y);\n"
43895"/// \\endcode\n"
43896"///\n"
43897"/// This intrinsic corresponds to the <c> VROUNDSS / ROUNDSS </c> instruction.\n"
43898"///\n"
43899"/// \\param X\n"
43900"/// A 128-bit vector of [4 x float]. The values stored in bits [127:32] are\n"
43901"/// copied to the corresponding bits of the result.\n"
43902"/// \\param Y\n"
43903"/// A 128-bit vector of [4 x float]. The value stored in bits [31:0] is\n"
43904"/// rounded up to the nearest integer and copied to the corresponding bits\n"
43905"/// of the result.\n"
43906"/// \\returns A 128-bit vector of [4 x float] containing the copied and rounded\n"
43907"/// values.\n"
43908"#define _mm_ceil_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_CEIL)\n"
43909"\n"
43910"/// Copies the upper element of the first 128-bit vector operand to the\n"
43911"/// corresponding upper element of the 128-bit result vector of [2 x double].\n"
43912"/// Rounds up the lower element of the second 128-bit vector operand to an\n"
43913"/// integer and copies it to the lower element of the 128-bit result vector\n"
43914"/// of [2 x double].\n"
43915"///\n"
43916"/// \\headerfile <x86intrin.h>\n"
43917"///\n"
43918"/// \\code\n"
43919"/// __m128d _mm_ceil_sd(__m128d X, __m128d Y);\n"
43920"/// \\endcode\n"
43921"///\n"
43922"/// This intrinsic corresponds to the <c> VROUNDSD / ROUNDSD </c> instruction.\n"
43923"///\n"
43924"/// \\param X\n"
43925"/// A 128-bit vector of [2 x double]. The value stored in bits [127:64] is\n"
43926"/// copied to the corresponding bits of the result.\n"
43927"/// \\param Y\n"
43928"/// A 128-bit vector of [2 x double]. The value stored in bits [63:0] is\n"
43929"/// rounded up to the nearest integer and copied to the corresponding bits\n"
43930"/// of the result.\n"
43931"/// \\returns A 128-bit vector of [2 x double] containing the copied and rounded\n"
43932"/// values.\n"
43933"#define _mm_ceil_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_CEIL)\n"
43934"\n"
43935"/// Rounds down each element of the 128-bit vector of [4 x float] to an\n"
43936"/// an integer and returns the rounded values in a 128-bit vector of\n"
43937"/// [4 x float].\n"
43938"///\n"
43939"/// \\headerfile <x86intrin.h>\n"
43940"///\n"
43941"/// \\code\n"
43942"/// __m128 _mm_floor_ps(__m128 X);\n"
43943"/// \\endcode\n"
43944"///\n"
43945"/// This intrinsic corresponds to the <c> VROUNDPS / ROUNDPS </c> instruction.\n"
43946"///\n"
43947"/// \\param X\n"
43948"/// A 128-bit vector of [4 x float] values to be rounded down.\n"
43949"/// \\returns A 128-bit vector of [4 x float] containing the rounded values.\n"
43950"#define _mm_floor_ps(X) _mm_round_ps((X), _MM_FROUND_FLOOR)\n"
43951"\n"
43952"/// Rounds down each element of the 128-bit vector of [2 x double] to an\n"
43953"/// integer and returns the rounded values in a 128-bit vector of\n"
43954"/// [2 x double].\n"
43955"///\n"
43956"/// \\headerfile <x86intrin.h>\n"
43957"///\n"
43958"/// \\code\n"
43959"/// __m128d _mm_floor_pd(__m128d X);\n"
43960"/// \\endcode\n"
43961"///\n"
43962"/// This intrinsic corresponds to the <c> VROUNDPD / ROUNDPD </c> instruction.\n"
43963"///\n"
43964"/// \\param X\n"
43965"/// A 128-bit vector of [2 x double].\n"
43966"/// \\returns A 128-bit vector of [2 x double] containing the rounded values.\n"
43967"#define _mm_floor_pd(X) _mm_round_pd((X), _MM_FROUND_FLOOR)\n"
43968"\n"
43969"/// Copies three upper elements of the first 128-bit vector operand to\n"
43970"/// the corresponding three upper elements of the 128-bit result vector of\n"
43971"/// [4 x float]. Rounds down the lowest element of the second 128-bit vector\n"
43972"/// operand to an integer and copies it to the lowest element of the 128-bit\n"
43973"/// result vector of [4 x float].\n"
43974"///\n"
43975"/// \\headerfile <x86intrin.h>\n"
43976"///\n"
43977"/// \\code\n"
43978"/// __m128 _mm_floor_ss(__m128 X, __m128 Y);\n"
43979"/// \\endcode\n"
43980"///\n"
43981"/// This intrinsic corresponds to the <c> VROUNDSS / ROUNDSS </c> instruction.\n"
43982"///\n"
43983"/// \\param X\n"
43984"/// A 128-bit vector of [4 x float]. The values stored in bits [127:32] are\n"
43985"/// copied to the corresponding bits of the result.\n"
43986"/// \\param Y\n"
43987"/// A 128-bit vector of [4 x float]. The value stored in bits [31:0] is\n"
43988"/// rounded down to the nearest integer and copied to the corresponding bits\n"
43989"/// of the result.\n"
43990"/// \\returns A 128-bit vector of [4 x float] containing the copied and rounded\n"
43991"/// values.\n"
43992"#define _mm_floor_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_FLOOR)\n"
43993"\n"
43994"/// Copies the upper element of the first 128-bit vector operand to the\n"
43995"/// corresponding upper element of the 128-bit result vector of [2 x double].\n"
43996"/// Rounds down the lower element of the second 128-bit vector operand to an\n"
43997"/// integer and copies it to the lower element of the 128-bit result vector\n"
43998"/// of [2 x double].\n"
43999"///\n"
44000"/// \\headerfile <x86intrin.h>\n"
44001"///\n"
44002"/// \\code\n"
44003"/// __m128d _mm_floor_sd(__m128d X, __m128d Y);\n"
44004"/// \\endcode\n"
44005"///\n"
44006"/// This intrinsic corresponds to the <c> VROUNDSD / ROUNDSD </c> instruction.\n"
44007"///\n"
44008"/// \\param X\n"
44009"/// A 128-bit vector of [2 x double]. The value stored in bits [127:64] is\n"
44010"/// copied to the corresponding bits of the result.\n"
44011"/// \\param Y\n"
44012"/// A 128-bit vector of [2 x double]. The value stored in bits [63:0] is\n"
44013"/// rounded down to the nearest integer and copied to the corresponding bits\n"
44014"/// of the result.\n"
44015"/// \\returns A 128-bit vector of [2 x double] containing the copied and rounded\n"
44016"/// values.\n"
44017"#define _mm_floor_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_FLOOR)\n"
44018"\n"
44019"/// Rounds each element of the 128-bit vector of [4 x float] to an\n"
44020"/// integer value according to the rounding control specified by the second\n"
44021"/// argument and returns the rounded values in a 128-bit vector of\n"
44022"/// [4 x float].\n"
44023"///\n"
44024"/// \\headerfile <x86intrin.h>\n"
44025"///\n"
44026"/// \\code\n"
44027"/// __m128 _mm_round_ps(__m128 X, const int M);\n"
44028"/// \\endcode\n"
44029"///\n"
44030"/// This intrinsic corresponds to the <c> VROUNDPS / ROUNDPS </c> instruction.\n"
44031"///\n"
44032"/// \\param X\n"
44033"/// A 128-bit vector of [4 x float].\n"
44034"/// \\param M\n"
44035"/// An integer value that specifies the rounding operation. \\n\n"
44036"/// Bits [7:4] are reserved. \\n\n"
44037"/// Bit [3] is a precision exception value: \\n\n"
44038"/// 0: A normal PE exception is used \\n\n"
44039"/// 1: The PE field is not updated \\n\n"
44040"/// Bit [2] is the rounding control source: \\n\n"
44041"/// 0: Use bits [1:0] of \\a M \\n\n"
44042"/// 1: Use the current MXCSR setting \\n\n"
44043"/// Bits [1:0] contain the rounding control definition: \\n\n"
44044"/// 00: Nearest \\n\n"
44045"/// 01: Downward (toward negative infinity) \\n\n"
44046"/// 10: Upward (toward positive infinity) \\n\n"
44047"/// 11: Truncated\n"
44048"/// \\returns A 128-bit vector of [4 x float] containing the rounded values.\n"
44049"#define _mm_round_ps(X, M) \\\n"
44050" (__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M))\n"
44051"\n"
44052"/// Copies three upper elements of the first 128-bit vector operand to\n"
44053"/// the corresponding three upper elements of the 128-bit result vector of\n"
44054"/// [4 x float]. Rounds the lowest element of the second 128-bit vector\n"
44055"/// operand to an integer value according to the rounding control specified\n"
44056"/// by the third argument and copies it to the lowest element of the 128-bit\n"
44057"/// result vector of [4 x float].\n"
44058"///\n"
44059"/// \\headerfile <x86intrin.h>\n"
44060"///\n"
44061"/// \\code\n"
44062"/// __m128 _mm_round_ss(__m128 X, __m128 Y, const int M);\n"
44063"/// \\endcode\n"
44064"///\n"
44065"/// This intrinsic corresponds to the <c> VROUNDSS / ROUNDSS </c> instruction.\n"
44066"///\n"
44067"/// \\param X\n"
44068"/// A 128-bit vector of [4 x float]. The values stored in bits [127:32] are\n"
44069"/// copied to the corresponding bits of the result.\n"
44070"/// \\param Y\n"
44071"/// A 128-bit vector of [4 x float]. The value stored in bits [31:0] is\n"
44072"/// rounded to the nearest integer using the specified rounding control and\n"
44073"/// copied to the corresponding bits of the result.\n"
44074"/// \\param M\n"
44075"/// An integer value that specifies the rounding operation. \\n\n"
44076"/// Bits [7:4] are reserved. \\n\n"
44077"/// Bit [3] is a precision exception value: \\n\n"
44078"/// 0: A normal PE exception is used \\n\n"
44079"/// 1: The PE field is not updated \\n\n"
44080"/// Bit [2] is the rounding control source: \\n\n"
44081"/// 0: Use bits [1:0] of \\a M \\n\n"
44082"/// 1: Use the current MXCSR setting \\n\n"
44083"/// Bits [1:0] contain the rounding control definition: \\n\n"
44084"/// 00: Nearest \\n\n"
44085"/// 01: Downward (toward negative infinity) \\n\n"
44086"/// 10: Upward (toward positive infinity) \\n\n"
44087"/// 11: Truncated\n"
44088"/// \\returns A 128-bit vector of [4 x float] containing the copied and rounded\n"
44089"/// values.\n"
44090"#define _mm_round_ss(X, Y, M) \\\n"
44091" (__m128)__builtin_ia32_roundss((__v4sf)(__m128)(X), \\\n"
44092" (__v4sf)(__m128)(Y), (M))\n"
44093"\n"
44094"/// Rounds each element of the 128-bit vector of [2 x double] to an\n"
44095"/// integer value according to the rounding control specified by the second\n"
44096"/// argument and returns the rounded values in a 128-bit vector of\n"
44097"/// [2 x double].\n"
44098"///\n"
44099"/// \\headerfile <x86intrin.h>\n"
44100"///\n"
44101"/// \\code\n"
44102"/// __m128d _mm_round_pd(__m128d X, const int M);\n"
44103"/// \\endcode\n"
44104"///\n"
44105"/// This intrinsic corresponds to the <c> VROUNDPD / ROUNDPD </c> instruction.\n"
44106"///\n"
44107"/// \\param X\n"
44108"/// A 128-bit vector of [2 x double].\n"
44109"/// \\param M\n"
44110"/// An integer value that specifies the rounding operation. \\n\n"
44111"/// Bits [7:4] are reserved. \\n\n"
44112"/// Bit [3] is a precision exception value: \\n\n"
44113"/// 0: A normal PE exception is used \\n\n"
44114"/// 1: The PE field is not updated \\n\n"
44115"/// Bit [2] is the rounding control source: \\n\n"
44116"/// 0: Use bits [1:0] of \\a M \\n\n"
44117"/// 1: Use the current MXCSR setting \\n\n"
44118"/// Bits [1:0] contain the rounding control definition: \\n\n"
44119"/// 00: Nearest \\n\n"
44120"/// 01: Downward (toward negative infinity) \\n\n"
44121"/// 10: Upward (toward positive infinity) \\n\n"
44122"/// 11: Truncated\n"
44123"/// \\returns A 128-bit vector of [2 x double] containing the rounded values.\n"
44124"#define _mm_round_pd(X, M) \\\n"
44125" (__m128d)__builtin_ia32_roundpd((__v2df)(__m128d)(X), (M))\n"
44126"\n"
44127"/// Copies the upper element of the first 128-bit vector operand to the\n"
44128"/// corresponding upper element of the 128-bit result vector of [2 x double].\n"
44129"/// Rounds the lower element of the second 128-bit vector operand to an\n"
44130"/// integer value according to the rounding control specified by the third\n"
44131"/// argument and copies it to the lower element of the 128-bit result vector\n"
44132"/// of [2 x double].\n"
44133"///\n"
44134"/// \\headerfile <x86intrin.h>\n"
44135"///\n"
44136"/// \\code\n"
44137"/// __m128d _mm_round_sd(__m128d X, __m128d Y, const int M);\n"
44138"/// \\endcode\n"
44139"///\n"
44140"/// This intrinsic corresponds to the <c> VROUNDSD / ROUNDSD </c> instruction.\n"
44141"///\n"
44142"/// \\param X\n"
44143"/// A 128-bit vector of [2 x double]. The value stored in bits [127:64] is\n"
44144"/// copied to the corresponding bits of the result.\n"
44145"/// \\param Y\n"
44146"/// A 128-bit vector of [2 x double]. The value stored in bits [63:0] is\n"
44147"/// rounded to the nearest integer using the specified rounding control and\n"
44148"/// copied to the corresponding bits of the result.\n"
44149"/// \\param M\n"
44150"/// An integer value that specifies the rounding operation. \\n\n"
44151"/// Bits [7:4] are reserved. \\n\n"
44152"/// Bit [3] is a precision exception value: \\n\n"
44153"/// 0: A normal PE exception is used \\n\n"
44154"/// 1: The PE field is not updated \\n\n"
44155"/// Bit [2] is the rounding control source: \\n\n"
44156"/// 0: Use bits [1:0] of \\a M \\n\n"
44157"/// 1: Use the current MXCSR setting \\n\n"
44158"/// Bits [1:0] contain the rounding control definition: \\n\n"
44159"/// 00: Nearest \\n\n"
44160"/// 01: Downward (toward negative infinity) \\n\n"
44161"/// 10: Upward (toward positive infinity) \\n\n"
44162"/// 11: Truncated\n"
44163"/// \\returns A 128-bit vector of [2 x double] containing the copied and rounded\n"
44164"/// values.\n"
44165"#define _mm_round_sd(X, Y, M) \\\n"
44166" (__m128d)__builtin_ia32_roundsd((__v2df)(__m128d)(X), \\\n"
44167" (__v2df)(__m128d)(Y), (M))\n"
44168"\n"
44169"/* SSE4 Packed Blending Intrinsics. */\n"
44170"/// Returns a 128-bit vector of [2 x double] where the values are\n"
44171"/// selected from either the first or second operand as specified by the\n"
44172"/// third operand, the control mask.\n"
44173"///\n"
44174"/// \\headerfile <x86intrin.h>\n"
44175"///\n"
44176"/// \\code\n"
44177"/// __m128d _mm_blend_pd(__m128d V1, __m128d V2, const int M);\n"
44178"/// \\endcode\n"
44179"///\n"
44180"/// This intrinsic corresponds to the <c> VBLENDPD / BLENDPD </c> instruction.\n"
44181"///\n"
44182"/// \\param V1\n"
44183"/// A 128-bit vector of [2 x double].\n"
44184"/// \\param V2\n"
44185"/// A 128-bit vector of [2 x double].\n"
44186"/// \\param M\n"
44187"/// An immediate integer operand, with mask bits [1:0] specifying how the\n"
44188"/// values are to be copied. The position of the mask bit corresponds to the\n"
44189"/// index of a copied value. When a mask bit is 0, the corresponding 64-bit\n"
44190"/// element in operand \\a V1 is copied to the same position in the result.\n"
44191"/// When a mask bit is 1, the corresponding 64-bit element in operand \\a V2\n"
44192"/// is copied to the same position in the result.\n"
44193"/// \\returns A 128-bit vector of [2 x double] containing the copied values.\n"
44194"#define _mm_blend_pd(V1, V2, M) \\\n"
44195" (__m128d) __builtin_ia32_blendpd ((__v2df)(__m128d)(V1), \\\n"
44196" (__v2df)(__m128d)(V2), (int)(M))\n"
44197"\n"
44198"/// Returns a 128-bit vector of [4 x float] where the values are selected\n"
44199"/// from either the first or second operand as specified by the third\n"
44200"/// operand, the control mask.\n"
44201"///\n"
44202"/// \\headerfile <x86intrin.h>\n"
44203"///\n"
44204"/// \\code\n"
44205"/// __m128 _mm_blend_ps(__m128 V1, __m128 V2, const int M);\n"
44206"/// \\endcode\n"
44207"///\n"
44208"/// This intrinsic corresponds to the <c> VBLENDPS / BLENDPS </c> instruction.\n"
44209"///\n"
44210"/// \\param V1\n"
44211"/// A 128-bit vector of [4 x float].\n"
44212"/// \\param V2\n"
44213"/// A 128-bit vector of [4 x float].\n"
44214"/// \\param M\n"
44215"/// An immediate integer operand, with mask bits [3:0] specifying how the\n"
44216"/// values are to be copied. The position of the mask bit corresponds to the\n"
44217"/// index of a copied value. When a mask bit is 0, the corresponding 32-bit\n"
44218"/// element in operand \\a V1 is copied to the same position in the result.\n"
44219"/// When a mask bit is 1, the corresponding 32-bit element in operand \\a V2\n"
44220"/// is copied to the same position in the result.\n"
44221"/// \\returns A 128-bit vector of [4 x float] containing the copied values.\n"
44222"#define _mm_blend_ps(V1, V2, M) \\\n"
44223" (__m128) __builtin_ia32_blendps ((__v4sf)(__m128)(V1), \\\n"
44224" (__v4sf)(__m128)(V2), (int)(M))\n"
44225"\n"
44226"/// Returns a 128-bit vector of [2 x double] where the values are\n"
44227"/// selected from either the first or second operand as specified by the\n"
44228"/// third operand, the control mask.\n"
44229"///\n"
44230"/// \\headerfile <x86intrin.h>\n"
44231"///\n"
44232"/// This intrinsic corresponds to the <c> VBLENDVPD / BLENDVPD </c> instruction.\n"
44233"///\n"
44234"/// \\param __V1\n"
44235"/// A 128-bit vector of [2 x double].\n"
44236"/// \\param __V2\n"
44237"/// A 128-bit vector of [2 x double].\n"
44238"/// \\param __M\n"
44239"/// A 128-bit vector operand, with mask bits 127 and 63 specifying how the\n"
44240"/// values are to be copied. The position of the mask bit corresponds to the\n"
44241"/// most significant bit of a copied value. When a mask bit is 0, the\n"
44242"/// corresponding 64-bit element in operand \\a __V1 is copied to the same\n"
44243"/// position in the result. When a mask bit is 1, the corresponding 64-bit\n"
44244"/// element in operand \\a __V2 is copied to the same position in the result.\n"
44245"/// \\returns A 128-bit vector of [2 x double] containing the copied values.\n"
44246"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
44247"_mm_blendv_pd (__m128d __V1, __m128d __V2, __m128d __M)\n"
44248"{\n"
44249" return (__m128d) __builtin_ia32_blendvpd ((__v2df)__V1, (__v2df)__V2,\n"
44250" (__v2df)__M);\n"
44251"}\n"
44252"\n"
44253"/// Returns a 128-bit vector of [4 x float] where the values are\n"
44254"/// selected from either the first or second operand as specified by the\n"
44255"/// third operand, the control mask.\n"
44256"///\n"
44257"/// \\headerfile <x86intrin.h>\n"
44258"///\n"
44259"/// This intrinsic corresponds to the <c> VBLENDVPS / BLENDVPS </c> instruction.\n"
44260"///\n"
44261"/// \\param __V1\n"
44262"/// A 128-bit vector of [4 x float].\n"
44263"/// \\param __V2\n"
44264"/// A 128-bit vector of [4 x float].\n"
44265"/// \\param __M\n"
44266"/// A 128-bit vector operand, with mask bits 127, 95, 63, and 31 specifying\n"
44267"/// how the values are to be copied. The position of the mask bit corresponds\n"
44268"/// to the most significant bit of a copied value. When a mask bit is 0, the\n"
44269"/// corresponding 32-bit element in operand \\a __V1 is copied to the same\n"
44270"/// position in the result. When a mask bit is 1, the corresponding 32-bit\n"
44271"/// element in operand \\a __V2 is copied to the same position in the result.\n"
44272"/// \\returns A 128-bit vector of [4 x float] containing the copied values.\n"
44273"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
44274"_mm_blendv_ps (__m128 __V1, __m128 __V2, __m128 __M)\n"
44275"{\n"
44276" return (__m128) __builtin_ia32_blendvps ((__v4sf)__V1, (__v4sf)__V2,\n"
44277" (__v4sf)__M);\n"
44278"}\n"
44279"\n"
44280"/// Returns a 128-bit vector of [16 x i8] where the values are selected\n"
44281"/// from either of the first or second operand as specified by the third\n"
44282"/// operand, the control mask.\n"
44283"///\n"
44284"/// \\headerfile <x86intrin.h>\n"
44285"///\n"
44286"/// This intrinsic corresponds to the <c> VPBLENDVB / PBLENDVB </c> instruction.\n"
44287"///\n"
44288"/// \\param __V1\n"
44289"/// A 128-bit vector of [16 x i8].\n"
44290"/// \\param __V2\n"
44291"/// A 128-bit vector of [16 x i8].\n"
44292"/// \\param __M\n"
44293"/// A 128-bit vector operand, with mask bits 127, 119, 111...7 specifying\n"
44294"/// how the values are to be copied. The position of the mask bit corresponds\n"
44295"/// to the most significant bit of a copied value. When a mask bit is 0, the\n"
44296"/// corresponding 8-bit element in operand \\a __V1 is copied to the same\n"
44297"/// position in the result. When a mask bit is 1, the corresponding 8-bit\n"
44298"/// element in operand \\a __V2 is copied to the same position in the result.\n"
44299"/// \\returns A 128-bit vector of [16 x i8] containing the copied values.\n"
44300"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
44301"_mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M)\n"
44302"{\n"
44303" return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__V1, (__v16qi)__V2,\n"
44304" (__v16qi)__M);\n"
44305"}\n"
44306"\n"
44307"/// Returns a 128-bit vector of [8 x i16] where the values are selected\n"
44308"/// from either of the first or second operand as specified by the third\n"
44309"/// operand, the control mask.\n"
44310"///\n"
44311"/// \\headerfile <x86intrin.h>\n"
44312"///\n"
44313"/// \\code\n"
44314"/// __m128i _mm_blend_epi16(__m128i V1, __m128i V2, const int M);\n"
44315"/// \\endcode\n"
44316"///\n"
44317"/// This intrinsic corresponds to the <c> VPBLENDW / PBLENDW </c> instruction.\n"
44318"///\n"
44319"/// \\param V1\n"
44320"/// A 128-bit vector of [8 x i16].\n"
44321"/// \\param V2\n"
44322"/// A 128-bit vector of [8 x i16].\n"
44323"/// \\param M\n"
44324"/// An immediate integer operand, with mask bits [7:0] specifying how the\n"
44325"/// values are to be copied. The position of the mask bit corresponds to the\n"
44326"/// index of a copied value. When a mask bit is 0, the corresponding 16-bit\n"
44327"/// element in operand \\a V1 is copied to the same position in the result.\n"
44328"/// When a mask bit is 1, the corresponding 16-bit element in operand \\a V2\n"
44329"/// is copied to the same position in the result.\n"
44330"/// \\returns A 128-bit vector of [8 x i16] containing the copied values.\n"
44331"#define _mm_blend_epi16(V1, V2, M) \\\n"
44332" (__m128i) __builtin_ia32_pblendw128 ((__v8hi)(__m128i)(V1), \\\n"
44333" (__v8hi)(__m128i)(V2), (int)(M))\n"
44334"\n"
44335"/* SSE4 Dword Multiply Instructions. */\n"
44336"/// Multiples corresponding elements of two 128-bit vectors of [4 x i32]\n"
44337"/// and returns the lower 32 bits of the each product in a 128-bit vector of\n"
44338"/// [4 x i32].\n"
44339"///\n"
44340"/// \\headerfile <x86intrin.h>\n"
44341"///\n"
44342"/// This intrinsic corresponds to the <c> VPMULLD / PMULLD </c> instruction.\n"
44343"///\n"
44344"/// \\param __V1\n"
44345"/// A 128-bit integer vector.\n"
44346"/// \\param __V2\n"
44347"/// A 128-bit integer vector.\n"
44348"/// \\returns A 128-bit integer vector containing the products of both operands.\n"
44349"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
44350"_mm_mullo_epi32 (__m128i __V1, __m128i __V2)\n"
44351"{\n"
44352" return (__m128i) ((__v4su)__V1 * (__v4su)__V2);\n"
44353"}\n"
44354"\n"
44355"/// Multiplies corresponding even-indexed elements of two 128-bit\n"
44356"/// vectors of [4 x i32] and returns a 128-bit vector of [2 x i64]\n"
44357"/// containing the products.\n"
44358"///\n"
44359"/// \\headerfile <x86intrin.h>\n"
44360"///\n"
44361"/// This intrinsic corresponds to the <c> VPMULDQ / PMULDQ </c> instruction.\n"
44362"///\n"
44363"/// \\param __V1\n"
44364"/// A 128-bit vector of [4 x i32].\n"
44365"/// \\param __V2\n"
44366"/// A 128-bit vector of [4 x i32].\n"
44367"/// \\returns A 128-bit vector of [2 x i64] containing the products of both\n"
44368"/// operands.\n"
44369"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
44370"_mm_mul_epi32 (__m128i __V1, __m128i __V2)\n"
44371"{\n"
44372" return (__m128i) __builtin_ia32_pmuldq128 ((__v4si)__V1, (__v4si)__V2);\n"
44373"}\n"
44374"\n"
44375"/* SSE4 Floating Point Dot Product Instructions. */\n"
44376"/// Computes the dot product of the two 128-bit vectors of [4 x float]\n"
44377"/// and returns it in the elements of the 128-bit result vector of\n"
44378"/// [4 x float].\n"
44379"///\n"
44380"/// The immediate integer operand controls which input elements\n"
44381"/// will contribute to the dot product, and where the final results are\n"
44382"/// returned.\n"
44383"///\n"
44384"/// \\headerfile <x86intrin.h>\n"
44385"///\n"
44386"/// \\code\n"
44387"/// __m128 _mm_dp_ps(__m128 X, __m128 Y, const int M);\n"
44388"/// \\endcode\n"
44389"///\n"
44390"/// This intrinsic corresponds to the <c> VDPPS / DPPS </c> instruction.\n"
44391"///\n"
44392"/// \\param X\n"
44393"/// A 128-bit vector of [4 x float].\n"
44394"/// \\param Y\n"
44395"/// A 128-bit vector of [4 x float].\n"
44396"/// \\param M\n"
44397"/// An immediate integer operand. Mask bits [7:4] determine which elements\n"
44398"/// of the input vectors are used, with bit [4] corresponding to the lowest\n"
44399"/// element and bit [7] corresponding to the highest element of each [4 x\n"
44400"/// float] vector. If a bit is set, the corresponding elements from the two\n"
44401"/// input vectors are used as an input for dot product; otherwise that input\n"
44402"/// is treated as zero. Bits [3:0] determine which elements of the result\n"
44403"/// will receive a copy of the final dot product, with bit [0] corresponding\n"
44404"/// to the lowest element and bit [3] corresponding to the highest element of\n"
44405"/// each [4 x float] subvector. If a bit is set, the dot product is returned\n"
44406"/// in the corresponding element; otherwise that element is set to zero.\n"
44407"/// \\returns A 128-bit vector of [4 x float] containing the dot product.\n"
44408"#define _mm_dp_ps(X, Y, M) \\\n"
44409" (__m128) __builtin_ia32_dpps((__v4sf)(__m128)(X), \\\n"
44410" (__v4sf)(__m128)(Y), (M))\n"
44411"\n"
44412"/// Computes the dot product of the two 128-bit vectors of [2 x double]\n"
44413"/// and returns it in the elements of the 128-bit result vector of\n"
44414"/// [2 x double].\n"
44415"///\n"
44416"/// The immediate integer operand controls which input\n"
44417"/// elements will contribute to the dot product, and where the final results\n"
44418"/// are returned.\n"
44419"///\n"
44420"/// \\headerfile <x86intrin.h>\n"
44421"///\n"
44422"/// \\code\n"
44423"/// __m128d _mm_dp_pd(__m128d X, __m128d Y, const int M);\n"
44424"/// \\endcode\n"
44425"///\n"
44426"/// This intrinsic corresponds to the <c> VDPPD / DPPD </c> instruction.\n"
44427"///\n"
44428"/// \\param X\n"
44429"/// A 128-bit vector of [2 x double].\n"
44430"/// \\param Y\n"
44431"/// A 128-bit vector of [2 x double].\n"
44432"/// \\param M\n"
44433"/// An immediate integer operand. Mask bits [5:4] determine which elements\n"
44434"/// of the input vectors are used, with bit [4] corresponding to the lowest\n"
44435"/// element and bit [5] corresponding to the highest element of each of [2 x\n"
44436"/// double] vector. If a bit is set, the corresponding elements from the two\n"
44437"/// input vectors are used as an input for dot product; otherwise that input\n"
44438"/// is treated as zero. Bits [1:0] determine which elements of the result\n"
44439"/// will receive a copy of the final dot product, with bit [0] corresponding\n"
44440"/// to the lowest element and bit [1] corresponding to the highest element of\n"
44441"/// each [2 x double] vector. If a bit is set, the dot product is returned in\n"
44442"/// the corresponding element; otherwise that element is set to zero.\n"
44443"#define _mm_dp_pd(X, Y, M) \\\n"
44444" (__m128d) __builtin_ia32_dppd((__v2df)(__m128d)(X), \\\n"
44445" (__v2df)(__m128d)(Y), (M))\n"
44446"\n"
44447"/* SSE4 Streaming Load Hint Instruction. */\n"
44448"/// Loads integer values from a 128-bit aligned memory location to a\n"
44449"/// 128-bit integer vector.\n"
44450"///\n"
44451"/// \\headerfile <x86intrin.h>\n"
44452"///\n"
44453"/// This intrinsic corresponds to the <c> VMOVNTDQA / MOVNTDQA </c> instruction.\n"
44454"///\n"
44455"/// \\param __V\n"
44456"/// A pointer to a 128-bit aligned memory location that contains the integer\n"
44457"/// values.\n"
44458"/// \\returns A 128-bit integer vector containing the data stored at the\n"
44459"/// specified memory location.\n"
44460"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
44461"_mm_stream_load_si128 (__m128i const *__V)\n"
44462"{\n"
44463" return (__m128i) __builtin_nontemporal_load ((const __v2di *) __V);\n"
44464"}\n"
44465"\n"
44466"/* SSE4 Packed Integer Min/Max Instructions. */\n"
44467"/// Compares the corresponding elements of two 128-bit vectors of\n"
44468"/// [16 x i8] and returns a 128-bit vector of [16 x i8] containing the lesser\n"
44469"/// of the two values.\n"
44470"///\n"
44471"/// \\headerfile <x86intrin.h>\n"
44472"///\n"
44473"/// This intrinsic corresponds to the <c> VPMINSB / PMINSB </c> instruction.\n"
44474"///\n"
44475"/// \\param __V1\n"
44476"/// A 128-bit vector of [16 x i8].\n"
44477"/// \\param __V2\n"
44478"/// A 128-bit vector of [16 x i8]\n"
44479"/// \\returns A 128-bit vector of [16 x i8] containing the lesser values.\n"
44480"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
44481"_mm_min_epi8 (__m128i __V1, __m128i __V2)\n"
44482"{\n"
44483" return (__m128i) __builtin_ia32_pminsb128 ((__v16qi) __V1, (__v16qi) __V2);\n"
44484"}\n"
44485"\n"
44486"/// Compares the corresponding elements of two 128-bit vectors of\n"
44487"/// [16 x i8] and returns a 128-bit vector of [16 x i8] containing the\n"
44488"/// greater value of the two.\n"
44489"///\n"
44490"/// \\headerfile <x86intrin.h>\n"
44491"///\n"
44492"/// This intrinsic corresponds to the <c> VPMAXSB / PMAXSB </c> instruction.\n"
44493"///\n"
44494"/// \\param __V1\n"
44495"/// A 128-bit vector of [16 x i8].\n"
44496"/// \\param __V2\n"
44497"/// A 128-bit vector of [16 x i8].\n"
44498"/// \\returns A 128-bit vector of [16 x i8] containing the greater values.\n"
44499"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
44500"_mm_max_epi8 (__m128i __V1, __m128i __V2)\n"
44501"{\n"
44502" return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi) __V1, (__v16qi) __V2);\n"
44503"}\n"
44504"\n"
44505"/// Compares the corresponding elements of two 128-bit vectors of\n"
44506"/// [8 x u16] and returns a 128-bit vector of [8 x u16] containing the lesser\n"
44507"/// value of the two.\n"
44508"///\n"
44509"/// \\headerfile <x86intrin.h>\n"
44510"///\n"
44511"/// This intrinsic corresponds to the <c> VPMINUW / PMINUW </c> instruction.\n"
44512"///\n"
44513"/// \\param __V1\n"
44514"/// A 128-bit vector of [8 x u16].\n"
44515"/// \\param __V2\n"
44516"/// A 128-bit vector of [8 x u16].\n"
44517"/// \\returns A 128-bit vector of [8 x u16] containing the lesser values.\n"
44518"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
44519"_mm_min_epu16 (__m128i __V1, __m128i __V2)\n"
44520"{\n"
44521" return (__m128i) __builtin_ia32_pminuw128 ((__v8hi) __V1, (__v8hi) __V2);\n"
44522"}\n"
44523"\n"
44524"/// Compares the corresponding elements of two 128-bit vectors of\n"
44525"/// [8 x u16] and returns a 128-bit vector of [8 x u16] containing the\n"
44526"/// greater value of the two.\n"
44527"///\n"
44528"/// \\headerfile <x86intrin.h>\n"
44529"///\n"
44530"/// This intrinsic corresponds to the <c> VPMAXUW / PMAXUW </c> instruction.\n"
44531"///\n"
44532"/// \\param __V1\n"
44533"/// A 128-bit vector of [8 x u16].\n"
44534"/// \\param __V2\n"
44535"/// A 128-bit vector of [8 x u16].\n"
44536"/// \\returns A 128-bit vector of [8 x u16] containing the greater values.\n"
44537"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
44538"_mm_max_epu16 (__m128i __V1, __m128i __V2)\n"
44539"{\n"
44540" return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi) __V1, (__v8hi) __V2);\n"
44541"}\n"
44542"\n"
44543"/// Compares the corresponding elements of two 128-bit vectors of\n"
44544"/// [4 x i32] and returns a 128-bit vector of [4 x i32] containing the lesser\n"
44545"/// value of the two.\n"
44546"///\n"
44547"/// \\headerfile <x86intrin.h>\n"
44548"///\n"
44549"/// This intrinsic corresponds to the <c> VPMINSD / PMINSD </c> instruction.\n"
44550"///\n"
44551"/// \\param __V1\n"
44552"/// A 128-bit vector of [4 x i32].\n"
44553"/// \\param __V2\n"
44554"/// A 128-bit vector of [4 x i32].\n"
44555"/// \\returns A 128-bit vector of [4 x i32] containing the lesser values.\n"
44556"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
44557"_mm_min_epi32 (__m128i __V1, __m128i __V2)\n"
44558"{\n"
44559" return (__m128i) __builtin_ia32_pminsd128 ((__v4si) __V1, (__v4si) __V2);\n"
44560"}\n"
44561"\n"
44562"/// Compares the corresponding elements of two 128-bit vectors of\n"
44563"/// [4 x i32] and returns a 128-bit vector of [4 x i32] containing the\n"
44564"/// greater value of the two.\n"
44565"///\n"
44566"/// \\headerfile <x86intrin.h>\n"
44567"///\n"
44568"/// This intrinsic corresponds to the <c> VPMAXSD / PMAXSD </c> instruction.\n"
44569"///\n"
44570"/// \\param __V1\n"
44571"/// A 128-bit vector of [4 x i32].\n"
44572"/// \\param __V2\n"
44573"/// A 128-bit vector of [4 x i32].\n"
44574"/// \\returns A 128-bit vector of [4 x i32] containing the greater values.\n"
44575"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
44576"_mm_max_epi32 (__m128i __V1, __m128i __V2)\n"
44577"{\n"
44578" return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si) __V1, (__v4si) __V2);\n"
44579"}\n"
44580"\n"
44581"/// Compares the corresponding elements of two 128-bit vectors of\n"
44582"/// [4 x u32] and returns a 128-bit vector of [4 x u32] containing the lesser\n"
44583"/// value of the two.\n"
44584"///\n"
44585"/// \\headerfile <x86intrin.h>\n"
44586"///\n"
44587"/// This intrinsic corresponds to the <c> VPMINUD / PMINUD </c> instruction.\n"
44588"///\n"
44589"/// \\param __V1\n"
44590"/// A 128-bit vector of [4 x u32].\n"
44591"/// \\param __V2\n"
44592"/// A 128-bit vector of [4 x u32].\n"
44593"/// \\returns A 128-bit vector of [4 x u32] containing the lesser values.\n"
44594"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
44595"_mm_min_epu32 (__m128i __V1, __m128i __V2)\n"
44596"{\n"
44597" return (__m128i) __builtin_ia32_pminud128((__v4si) __V1, (__v4si) __V2);\n"
44598"}\n"
44599"\n"
44600"/// Compares the corresponding elements of two 128-bit vectors of\n"
44601"/// [4 x u32] and returns a 128-bit vector of [4 x u32] containing the\n"
44602"/// greater value of the two.\n"
44603"///\n"
44604"/// \\headerfile <x86intrin.h>\n"
44605"///\n"
44606"/// This intrinsic corresponds to the <c> VPMAXUD / PMAXUD </c> instruction.\n"
44607"///\n"
44608"/// \\param __V1\n"
44609"/// A 128-bit vector of [4 x u32].\n"
44610"/// \\param __V2\n"
44611"/// A 128-bit vector of [4 x u32].\n"
44612"/// \\returns A 128-bit vector of [4 x u32] containing the greater values.\n"
44613"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
44614"_mm_max_epu32 (__m128i __V1, __m128i __V2)\n"
44615"{\n"
44616" return (__m128i) __builtin_ia32_pmaxud128((__v4si) __V1, (__v4si) __V2);\n"
44617"}\n"
44618"\n"
44619"/* SSE4 Insertion and Extraction from XMM Register Instructions. */\n"
44620"/// Takes the first argument \\a X and inserts an element from the second\n"
44621"/// argument \\a Y as selected by the third argument \\a N. That result then\n"
44622"/// has elements zeroed out also as selected by the third argument \\a N. The\n"
44623"/// resulting 128-bit vector of [4 x float] is then returned.\n"
44624"///\n"
44625"/// \\headerfile <x86intrin.h>\n"
44626"///\n"
44627"/// \\code\n"
44628"/// __m128 _mm_insert_ps(__m128 X, __m128 Y, const int N);\n"
44629"/// \\endcode\n"
44630"///\n"
44631"/// This intrinsic corresponds to the <c> VINSERTPS </c> instruction.\n"
44632"///\n"
44633"/// \\param X\n"
44634"/// A 128-bit vector source operand of [4 x float]. With the exception of\n"
44635"/// those bits in the result copied from parameter \\a Y and zeroed by bits\n"
44636"/// [3:0] of \\a N, all bits from this parameter are copied to the result.\n"
44637"/// \\param Y\n"
44638"/// A 128-bit vector source operand of [4 x float]. One single-precision\n"
44639"/// floating-point element from this source, as determined by the immediate\n"
44640"/// parameter, is copied to the result.\n"
44641"/// \\param N\n"
44642"/// Specifies which bits from operand \\a Y will be copied, which bits in the\n"
44643"/// result they will be be copied to, and which bits in the result will be\n"
44644"/// cleared. The following assignments are made: \\n\n"
44645"/// Bits [7:6] specify the bits to copy from operand \\a Y: \\n\n"
44646"/// 00: Selects bits [31:0] from operand \\a Y. \\n\n"
44647"/// 01: Selects bits [63:32] from operand \\a Y. \\n\n"
44648"/// 10: Selects bits [95:64] from operand \\a Y. \\n\n"
44649"/// 11: Selects bits [127:96] from operand \\a Y. \\n\n"
44650"/// Bits [5:4] specify the bits in the result to which the selected bits\n"
44651"/// from operand \\a Y are copied: \\n\n"
44652"/// 00: Copies the selected bits from \\a Y to result bits [31:0]. \\n\n"
44653"/// 01: Copies the selected bits from \\a Y to result bits [63:32]. \\n\n"
44654"/// 10: Copies the selected bits from \\a Y to result bits [95:64]. \\n\n"
44655"/// 11: Copies the selected bits from \\a Y to result bits [127:96]. \\n\n"
44656"/// Bits[3:0]: If any of these bits are set, the corresponding result\n"
44657"/// element is cleared.\n"
44658"/// \\returns A 128-bit vector of [4 x float] containing the copied\n"
44659"/// single-precision floating point elements from the operands.\n"
44660"#define _mm_insert_ps(X, Y, N) __builtin_ia32_insertps128((X), (Y), (N))\n"
44661"\n"
44662"/// Extracts a 32-bit integer from a 128-bit vector of [4 x float] and\n"
44663"/// returns it, using the immediate value parameter \\a N as a selector.\n"
44664"///\n"
44665"/// \\headerfile <x86intrin.h>\n"
44666"///\n"
44667"/// \\code\n"
44668"/// int _mm_extract_ps(__m128 X, const int N);\n"
44669"/// \\endcode\n"
44670"///\n"
44671"/// This intrinsic corresponds to the <c> VEXTRACTPS / EXTRACTPS </c>\n"
44672"/// instruction.\n"
44673"///\n"
44674"/// \\param X\n"
44675"/// A 128-bit vector of [4 x float].\n"
44676"/// \\param N\n"
44677"/// An immediate value. Bits [1:0] determines which bits from the argument\n"
44678"/// \\a X are extracted and returned: \\n\n"
44679"/// 00: Bits [31:0] of parameter \\a X are returned. \\n\n"
44680"/// 01: Bits [63:32] of parameter \\a X are returned. \\n\n"
44681"/// 10: Bits [95:64] of parameter \\a X are returned. \\n\n"
44682"/// 11: Bits [127:96] of parameter \\a X are returned.\n"
44683"/// \\returns A 32-bit integer containing the extracted 32 bits of float data.\n"
44684"#define _mm_extract_ps(X, N) (__extension__ \\\n"
44685" ({ union { int __i; float __f; } __t; \\\n"
44686" __t.__f = __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)); \\\n"
44687" __t.__i;}))\n"
44688"\n"
44689"/* Miscellaneous insert and extract macros. */\n"
44690"/* Extract a single-precision float from X at index N into D. */\n"
44691"#define _MM_EXTRACT_FLOAT(D, X, N) \\\n"
44692" { (D) = __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)); }\n"
44693"\n"
44694"/* Or together 2 sets of indexes (X and Y) with the zeroing bits (Z) to create\n"
44695" an index suitable for _mm_insert_ps. */\n"
44696"#define _MM_MK_INSERTPS_NDX(X, Y, Z) (((X) << 6) | ((Y) << 4) | (Z))\n"
44697"\n"
44698"/* Extract a float from X at index N into the first index of the return. */\n"
44699"#define _MM_PICK_OUT_PS(X, N) _mm_insert_ps (_mm_setzero_ps(), (X), \\\n"
44700" _MM_MK_INSERTPS_NDX((N), 0, 0x0e))\n"
44701"\n"
44702"/* Insert int into packed integer array at index. */\n"
44703"/// Constructs a 128-bit vector of [16 x i8] by first making a copy of\n"
44704"/// the 128-bit integer vector parameter, and then inserting the lower 8 bits\n"
44705"/// of an integer parameter \\a I into an offset specified by the immediate\n"
44706"/// value parameter \\a N.\n"
44707"///\n"
44708"/// \\headerfile <x86intrin.h>\n"
44709"///\n"
44710"/// \\code\n"
44711"/// __m128i _mm_insert_epi8(__m128i X, int I, const int N);\n"
44712"/// \\endcode\n"
44713"///\n"
44714"/// This intrinsic corresponds to the <c> VPINSRB / PINSRB </c> instruction.\n"
44715"///\n"
44716"/// \\param X\n"
44717"/// A 128-bit integer vector of [16 x i8]. This vector is copied to the\n"
44718"/// result and then one of the sixteen elements in the result vector is\n"
44719"/// replaced by the lower 8 bits of \\a I.\n"
44720"/// \\param I\n"
44721"/// An integer. The lower 8 bits of this operand are written to the result\n"
44722"/// beginning at the offset specified by \\a N.\n"
44723"/// \\param N\n"
44724"/// An immediate value. Bits [3:0] specify the bit offset in the result at\n"
44725"/// which the lower 8 bits of \\a I are written. \\n\n"
44726"/// 0000: Bits [7:0] of the result are used for insertion. \\n\n"
44727"/// 0001: Bits [15:8] of the result are used for insertion. \\n\n"
44728"/// 0010: Bits [23:16] of the result are used for insertion. \\n\n"
44729"/// 0011: Bits [31:24] of the result are used for insertion. \\n\n"
44730"/// 0100: Bits [39:32] of the result are used for insertion. \\n\n"
44731"/// 0101: Bits [47:40] of the result are used for insertion. \\n\n"
44732"/// 0110: Bits [55:48] of the result are used for insertion. \\n\n"
44733"/// 0111: Bits [63:56] of the result are used for insertion. \\n\n"
44734"/// 1000: Bits [71:64] of the result are used for insertion. \\n\n"
44735"/// 1001: Bits [79:72] of the result are used for insertion. \\n\n"
44736"/// 1010: Bits [87:80] of the result are used for insertion. \\n\n"
44737"/// 1011: Bits [95:88] of the result are used for insertion. \\n\n"
44738"/// 1100: Bits [103:96] of the result are used for insertion. \\n\n"
44739"/// 1101: Bits [111:104] of the result are used for insertion. \\n\n"
44740"/// 1110: Bits [119:112] of the result are used for insertion. \\n\n"
44741"/// 1111: Bits [127:120] of the result are used for insertion.\n"
44742"/// \\returns A 128-bit integer vector containing the constructed values.\n"
44743"#define _mm_insert_epi8(X, I, N) \\\n"
44744" (__m128i)__builtin_ia32_vec_set_v16qi((__v16qi)(__m128i)(X), \\\n"
44745" (int)(I), (int)(N))\n"
44746"\n"
44747"/// Constructs a 128-bit vector of [4 x i32] by first making a copy of\n"
44748"/// the 128-bit integer vector parameter, and then inserting the 32-bit\n"
44749"/// integer parameter \\a I at the offset specified by the immediate value\n"
44750"/// parameter \\a N.\n"
44751"///\n"
44752"/// \\headerfile <x86intrin.h>\n"
44753"///\n"
44754"/// \\code\n"
44755"/// __m128i _mm_insert_epi32(__m128i X, int I, const int N);\n"
44756"/// \\endcode\n"
44757"///\n"
44758"/// This intrinsic corresponds to the <c> VPINSRD / PINSRD </c> instruction.\n"
44759"///\n"
44760"/// \\param X\n"
44761"/// A 128-bit integer vector of [4 x i32]. This vector is copied to the\n"
44762"/// result and then one of the four elements in the result vector is\n"
44763"/// replaced by \\a I.\n"
44764"/// \\param I\n"
44765"/// A 32-bit integer that is written to the result beginning at the offset\n"
44766"/// specified by \\a N.\n"
44767"/// \\param N\n"
44768"/// An immediate value. Bits [1:0] specify the bit offset in the result at\n"
44769"/// which the integer \\a I is written. \\n\n"
44770"/// 00: Bits [31:0] of the result are used for insertion. \\n\n"
44771"/// 01: Bits [63:32] of the result are used for insertion. \\n\n"
44772"/// 10: Bits [95:64] of the result are used for insertion. \\n\n"
44773"/// 11: Bits [127:96] of the result are used for insertion.\n"
44774"/// \\returns A 128-bit integer vector containing the constructed values.\n"
44775"#define _mm_insert_epi32(X, I, N) \\\n"
44776" (__m128i)__builtin_ia32_vec_set_v4si((__v4si)(__m128i)(X), \\\n"
44777" (int)(I), (int)(N))\n"
44778"\n"
44779"#ifdef __x86_64__\n"
44780"/// Constructs a 128-bit vector of [2 x i64] by first making a copy of\n"
44781"/// the 128-bit integer vector parameter, and then inserting the 64-bit\n"
44782"/// integer parameter \\a I, using the immediate value parameter \\a N as an\n"
44783"/// insertion location selector.\n"
44784"///\n"
44785"/// \\headerfile <x86intrin.h>\n"
44786"///\n"
44787"/// \\code\n"
44788"/// __m128i _mm_insert_epi64(__m128i X, long long I, const int N);\n"
44789"/// \\endcode\n"
44790"///\n"
44791"/// This intrinsic corresponds to the <c> VPINSRQ / PINSRQ </c> instruction.\n"
44792"///\n"
44793"/// \\param X\n"
44794"/// A 128-bit integer vector of [2 x i64]. This vector is copied to the\n"
44795"/// result and then one of the two elements in the result vector is replaced\n"
44796"/// by \\a I.\n"
44797"/// \\param I\n"
44798"/// A 64-bit integer that is written to the result beginning at the offset\n"
44799"/// specified by \\a N.\n"
44800"/// \\param N\n"
44801"/// An immediate value. Bit [0] specifies the bit offset in the result at\n"
44802"/// which the integer \\a I is written. \\n\n"
44803"/// 0: Bits [63:0] of the result are used for insertion. \\n\n"
44804"/// 1: Bits [127:64] of the result are used for insertion. \\n\n"
44805"/// \\returns A 128-bit integer vector containing the constructed values.\n"
44806"#define _mm_insert_epi64(X, I, N) \\\n"
44807" (__m128i)__builtin_ia32_vec_set_v2di((__v2di)(__m128i)(X), \\\n"
44808" (long long)(I), (int)(N))\n"
44809"#endif /* __x86_64__ */\n"
44810"\n"
44811"/* Extract int from packed integer array at index. This returns the element\n"
44812" * as a zero extended value, so it is unsigned.\n"
44813" */\n"
44814"/// Extracts an 8-bit element from the 128-bit integer vector of\n"
44815"/// [16 x i8], using the immediate value parameter \\a N as a selector.\n"
44816"///\n"
44817"/// \\headerfile <x86intrin.h>\n"
44818"///\n"
44819"/// \\code\n"
44820"/// int _mm_extract_epi8(__m128i X, const int N);\n"
44821"/// \\endcode\n"
44822"///\n"
44823"/// This intrinsic corresponds to the <c> VPEXTRB / PEXTRB </c> instruction.\n"
44824"///\n"
44825"/// \\param X\n"
44826"/// A 128-bit integer vector.\n"
44827"/// \\param N\n"
44828"/// An immediate value. Bits [3:0] specify which 8-bit vector element from\n"
44829"/// the argument \\a X to extract and copy to the result. \\n\n"
44830"/// 0000: Bits [7:0] of parameter \\a X are extracted. \\n\n"
44831"/// 0001: Bits [15:8] of the parameter \\a X are extracted. \\n\n"
44832"/// 0010: Bits [23:16] of the parameter \\a X are extracted. \\n\n"
44833"/// 0011: Bits [31:24] of the parameter \\a X are extracted. \\n\n"
44834"/// 0100: Bits [39:32] of the parameter \\a X are extracted. \\n\n"
44835"/// 0101: Bits [47:40] of the parameter \\a X are extracted. \\n\n"
44836"/// 0110: Bits [55:48] of the parameter \\a X are extracted. \\n\n"
44837"/// 0111: Bits [63:56] of the parameter \\a X are extracted. \\n\n"
44838"/// 1000: Bits [71:64] of the parameter \\a X are extracted. \\n\n"
44839"/// 1001: Bits [79:72] of the parameter \\a X are extracted. \\n\n"
44840"/// 1010: Bits [87:80] of the parameter \\a X are extracted. \\n\n"
44841"/// 1011: Bits [95:88] of the parameter \\a X are extracted. \\n\n"
44842"/// 1100: Bits [103:96] of the parameter \\a X are extracted. \\n\n"
44843"/// 1101: Bits [111:104] of the parameter \\a X are extracted. \\n\n"
44844"/// 1110: Bits [119:112] of the parameter \\a X are extracted. \\n\n"
44845"/// 1111: Bits [127:120] of the parameter \\a X are extracted.\n"
44846"/// \\returns An unsigned integer, whose lower 8 bits are selected from the\n"
44847"/// 128-bit integer vector parameter and the remaining bits are assigned\n"
44848"/// zeros.\n"
44849"#define _mm_extract_epi8(X, N) \\\n"
44850" (int)(unsigned char)__builtin_ia32_vec_ext_v16qi((__v16qi)(__m128i)(X), \\\n"
44851" (int)(N))\n"
44852"\n"
44853"/// Extracts a 32-bit element from the 128-bit integer vector of\n"
44854"/// [4 x i32], using the immediate value parameter \\a N as a selector.\n"
44855"///\n"
44856"/// \\headerfile <x86intrin.h>\n"
44857"///\n"
44858"/// \\code\n"
44859"/// int _mm_extract_epi32(__m128i X, const int N);\n"
44860"/// \\endcode\n"
44861"///\n"
44862"/// This intrinsic corresponds to the <c> VPEXTRD / PEXTRD </c> instruction.\n"
44863"///\n"
44864"/// \\param X\n"
44865"/// A 128-bit integer vector.\n"
44866"/// \\param N\n"
44867"/// An immediate value. Bits [1:0] specify which 32-bit vector element from\n"
44868"/// the argument \\a X to extract and copy to the result. \\n\n"
44869"/// 00: Bits [31:0] of the parameter \\a X are extracted. \\n\n"
44870"/// 01: Bits [63:32] of the parameter \\a X are extracted. \\n\n"
44871"/// 10: Bits [95:64] of the parameter \\a X are extracted. \\n\n"
44872"/// 11: Bits [127:96] of the parameter \\a X are exracted.\n"
44873"/// \\returns An integer, whose lower 32 bits are selected from the 128-bit\n"
44874"/// integer vector parameter and the remaining bits are assigned zeros.\n"
44875"#define _mm_extract_epi32(X, N) \\\n"
44876" (int)__builtin_ia32_vec_ext_v4si((__v4si)(__m128i)(X), (int)(N))\n"
44877"\n"
44878"#ifdef __x86_64__\n"
44879"/// Extracts a 64-bit element from the 128-bit integer vector of\n"
44880"/// [2 x i64], using the immediate value parameter \\a N as a selector.\n"
44881"///\n"
44882"/// \\headerfile <x86intrin.h>\n"
44883"///\n"
44884"/// \\code\n"
44885"/// long long _mm_extract_epi64(__m128i X, const int N);\n"
44886"/// \\endcode\n"
44887"///\n"
44888"/// This intrinsic corresponds to the <c> VPEXTRQ / PEXTRQ </c> instruction.\n"
44889"///\n"
44890"/// \\param X\n"
44891"/// A 128-bit integer vector.\n"
44892"/// \\param N\n"
44893"/// An immediate value. Bit [0] specifies which 64-bit vector element from\n"
44894"/// the argument \\a X to return. \\n\n"
44895"/// 0: Bits [63:0] are returned. \\n\n"
44896"/// 1: Bits [127:64] are returned. \\n\n"
44897"/// \\returns A 64-bit integer.\n"
44898"#define _mm_extract_epi64(X, N) \\\n"
44899" (long long)__builtin_ia32_vec_ext_v2di((__v2di)(__m128i)(X), (int)(N))\n"
44900"#endif /* __x86_64 */\n"
44901"\n"
44902"/* SSE4 128-bit Packed Integer Comparisons. */\n"
44903"/// Tests whether the specified bits in a 128-bit integer vector are all\n"
44904"/// zeros.\n"
44905"///\n"
44906"/// \\headerfile <x86intrin.h>\n"
44907"///\n"
44908"/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.\n"
44909"///\n"
44910"/// \\param __M\n"
44911"/// A 128-bit integer vector containing the bits to be tested.\n"
44912"/// \\param __V\n"
44913"/// A 128-bit integer vector selecting which bits to test in operand \\a __M.\n"
44914"/// \\returns TRUE if the specified bits are all zeros; FALSE otherwise.\n"
44915"static __inline__ int __DEFAULT_FN_ATTRS\n"
44916"_mm_testz_si128(__m128i __M, __m128i __V)\n"
44917"{\n"
44918" return __builtin_ia32_ptestz128((__v2di)__M, (__v2di)__V);\n"
44919"}\n"
44920"\n"
44921"/// Tests whether the specified bits in a 128-bit integer vector are all\n"
44922"/// ones.\n"
44923"///\n"
44924"/// \\headerfile <x86intrin.h>\n"
44925"///\n"
44926"/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.\n"
44927"///\n"
44928"/// \\param __M\n"
44929"/// A 128-bit integer vector containing the bits to be tested.\n"
44930"/// \\param __V\n"
44931"/// A 128-bit integer vector selecting which bits to test in operand \\a __M.\n"
44932"/// \\returns TRUE if the specified bits are all ones; FALSE otherwise.\n"
44933"static __inline__ int __DEFAULT_FN_ATTRS\n"
44934"_mm_testc_si128(__m128i __M, __m128i __V)\n"
44935"{\n"
44936" return __builtin_ia32_ptestc128((__v2di)__M, (__v2di)__V);\n"
44937"}\n"
44938"\n"
44939"/// Tests whether the specified bits in a 128-bit integer vector are\n"
44940"/// neither all zeros nor all ones.\n"
44941"///\n"
44942"/// \\headerfile <x86intrin.h>\n"
44943"///\n"
44944"/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.\n"
44945"///\n"
44946"/// \\param __M\n"
44947"/// A 128-bit integer vector containing the bits to be tested.\n"
44948"/// \\param __V\n"
44949"/// A 128-bit integer vector selecting which bits to test in operand \\a __M.\n"
44950"/// \\returns TRUE if the specified bits are neither all zeros nor all ones;\n"
44951"/// FALSE otherwise.\n"
44952"static __inline__ int __DEFAULT_FN_ATTRS\n"
44953"_mm_testnzc_si128(__m128i __M, __m128i __V)\n"
44954"{\n"
44955" return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V);\n"
44956"}\n"
44957"\n"
44958"/// Tests whether the specified bits in a 128-bit integer vector are all\n"
44959"/// ones.\n"
44960"///\n"
44961"/// \\headerfile <x86intrin.h>\n"
44962"///\n"
44963"/// \\code\n"
44964"/// int _mm_test_all_ones(__m128i V);\n"
44965"/// \\endcode\n"
44966"///\n"
44967"/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.\n"
44968"///\n"
44969"/// \\param V\n"
44970"/// A 128-bit integer vector containing the bits to be tested.\n"
44971"/// \\returns TRUE if the bits specified in the operand are all set to 1; FALSE\n"
44972"/// otherwise.\n"
44973"#define _mm_test_all_ones(V) _mm_testc_si128((V), _mm_cmpeq_epi32((V), (V)))\n"
44974"\n"
44975"/// Tests whether the specified bits in a 128-bit integer vector are\n"
44976"/// neither all zeros nor all ones.\n"
44977"///\n"
44978"/// \\headerfile <x86intrin.h>\n"
44979"///\n"
44980"/// \\code\n"
44981"/// int _mm_test_mix_ones_zeros(__m128i M, __m128i V);\n"
44982"/// \\endcode\n"
44983"///\n"
44984"/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.\n"
44985"///\n"
44986"/// \\param M\n"
44987"/// A 128-bit integer vector containing the bits to be tested.\n"
44988"/// \\param V\n"
44989"/// A 128-bit integer vector selecting which bits to test in operand \\a M.\n"
44990"/// \\returns TRUE if the specified bits are neither all zeros nor all ones;\n"
44991"/// FALSE otherwise.\n"
44992"#define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V))\n"
44993"\n"
44994"/// Tests whether the specified bits in a 128-bit integer vector are all\n"
44995"/// zeros.\n"
44996"///\n"
44997"/// \\headerfile <x86intrin.h>\n"
44998"///\n"
44999"/// \\code\n"
45000"/// int _mm_test_all_zeros(__m128i M, __m128i V);\n"
45001"/// \\endcode\n"
45002"///\n"
45003"/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.\n"
45004"///\n"
45005"/// \\param M\n"
45006"/// A 128-bit integer vector containing the bits to be tested.\n"
45007"/// \\param V\n"
45008"/// A 128-bit integer vector selecting which bits to test in operand \\a M.\n"
45009"/// \\returns TRUE if the specified bits are all zeros; FALSE otherwise.\n"
45010"#define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V))\n"
45011"\n"
45012"/* SSE4 64-bit Packed Integer Comparisons. */\n"
45013"/// Compares each of the corresponding 64-bit values of the 128-bit\n"
45014"/// integer vectors for equality.\n"
45015"///\n"
45016"/// \\headerfile <x86intrin.h>\n"
45017"///\n"
45018"/// This intrinsic corresponds to the <c> VPCMPEQQ / PCMPEQQ </c> instruction.\n"
45019"///\n"
45020"/// \\param __V1\n"
45021"/// A 128-bit integer vector.\n"
45022"/// \\param __V2\n"
45023"/// A 128-bit integer vector.\n"
45024"/// \\returns A 128-bit integer vector containing the comparison results.\n"
45025"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
45026"_mm_cmpeq_epi64(__m128i __V1, __m128i __V2)\n"
45027"{\n"
45028" return (__m128i)((__v2di)__V1 == (__v2di)__V2);\n"
45029"}\n"
45030"\n"
45031"/* SSE4 Packed Integer Sign-Extension. */\n"
45032"/// Sign-extends each of the lower eight 8-bit integer elements of a\n"
45033"/// 128-bit vector of [16 x i8] to 16-bit values and returns them in a\n"
45034"/// 128-bit vector of [8 x i16]. The upper eight elements of the input vector\n"
45035"/// are unused.\n"
45036"///\n"
45037"/// \\headerfile <x86intrin.h>\n"
45038"///\n"
45039"/// This intrinsic corresponds to the <c> VPMOVSXBW / PMOVSXBW </c> instruction.\n"
45040"///\n"
45041"/// \\param __V\n"
45042"/// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are sign-\n"
45043"/// extended to 16-bit values.\n"
45044"/// \\returns A 128-bit vector of [8 x i16] containing the sign-extended values.\n"
45045"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
45046"_mm_cvtepi8_epi16(__m128i __V)\n"
45047"{\n"
45048" /* This function always performs a signed extension, but __v16qi is a char\n"
45049" which may be signed or unsigned, so use __v16qs. */\n"
45050" return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi);\n"
45051"}\n"
45052"\n"
45053"/// Sign-extends each of the lower four 8-bit integer elements of a\n"
45054"/// 128-bit vector of [16 x i8] to 32-bit values and returns them in a\n"
45055"/// 128-bit vector of [4 x i32]. The upper twelve elements of the input\n"
45056"/// vector are unused.\n"
45057"///\n"
45058"/// \\headerfile <x86intrin.h>\n"
45059"///\n"
45060"/// This intrinsic corresponds to the <c> VPMOVSXBD / PMOVSXBD </c> instruction.\n"
45061"///\n"
45062"/// \\param __V\n"
45063"/// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are\n"
45064"/// sign-extended to 32-bit values.\n"
45065"/// \\returns A 128-bit vector of [4 x i32] containing the sign-extended values.\n"
45066"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
45067"_mm_cvtepi8_epi32(__m128i __V)\n"
45068"{\n"
45069" /* This function always performs a signed extension, but __v16qi is a char\n"
45070" which may be signed or unsigned, so use __v16qs. */\n"
45071" return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4si);\n"
45072"}\n"
45073"\n"
45074"/// Sign-extends each of the lower two 8-bit integer elements of a\n"
45075"/// 128-bit integer vector of [16 x i8] to 64-bit values and returns them in\n"
45076"/// a 128-bit vector of [2 x i64]. The upper fourteen elements of the input\n"
45077"/// vector are unused.\n"
45078"///\n"
45079"/// \\headerfile <x86intrin.h>\n"
45080"///\n"
45081"/// This intrinsic corresponds to the <c> VPMOVSXBQ / PMOVSXBQ </c> instruction.\n"
45082"///\n"
45083"/// \\param __V\n"
45084"/// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are\n"
45085"/// sign-extended to 64-bit values.\n"
45086"/// \\returns A 128-bit vector of [2 x i64] containing the sign-extended values.\n"
45087"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
45088"_mm_cvtepi8_epi64(__m128i __V)\n"
45089"{\n"
45090" /* This function always performs a signed extension, but __v16qi is a char\n"
45091" which may be signed or unsigned, so use __v16qs. */\n"
45092" return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1), __v2di);\n"
45093"}\n"
45094"\n"
45095"/// Sign-extends each of the lower four 16-bit integer elements of a\n"
45096"/// 128-bit integer vector of [8 x i16] to 32-bit values and returns them in\n"
45097"/// a 128-bit vector of [4 x i32]. The upper four elements of the input\n"
45098"/// vector are unused.\n"
45099"///\n"
45100"/// \\headerfile <x86intrin.h>\n"
45101"///\n"
45102"/// This intrinsic corresponds to the <c> VPMOVSXWD / PMOVSXWD </c> instruction.\n"
45103"///\n"
45104"/// \\param __V\n"
45105"/// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are\n"
45106"/// sign-extended to 32-bit values.\n"
45107"/// \\returns A 128-bit vector of [4 x i32] containing the sign-extended values.\n"
45108"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
45109"_mm_cvtepi16_epi32(__m128i __V)\n"
45110"{\n"
45111" return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4si);\n"
45112"}\n"
45113"\n"
45114"/// Sign-extends each of the lower two 16-bit integer elements of a\n"
45115"/// 128-bit integer vector of [8 x i16] to 64-bit values and returns them in\n"
45116"/// a 128-bit vector of [2 x i64]. The upper six elements of the input\n"
45117"/// vector are unused.\n"
45118"///\n"
45119"/// \\headerfile <x86intrin.h>\n"
45120"///\n"
45121"/// This intrinsic corresponds to the <c> VPMOVSXWQ / PMOVSXWQ </c> instruction.\n"
45122"///\n"
45123"/// \\param __V\n"
45124"/// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are\n"
45125"/// sign-extended to 64-bit values.\n"
45126"/// \\returns A 128-bit vector of [2 x i64] containing the sign-extended values.\n"
45127"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
45128"_mm_cvtepi16_epi64(__m128i __V)\n"
45129"{\n"
45130" return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1), __v2di);\n"
45131"}\n"
45132"\n"
45133"/// Sign-extends each of the lower two 32-bit integer elements of a\n"
45134"/// 128-bit integer vector of [4 x i32] to 64-bit values and returns them in\n"
45135"/// a 128-bit vector of [2 x i64]. The upper two elements of the input vector\n"
45136"/// are unused.\n"
45137"///\n"
45138"/// \\headerfile <x86intrin.h>\n"
45139"///\n"
45140"/// This intrinsic corresponds to the <c> VPMOVSXDQ / PMOVSXDQ </c> instruction.\n"
45141"///\n"
45142"/// \\param __V\n"
45143"/// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are\n"
45144"/// sign-extended to 64-bit values.\n"
45145"/// \\returns A 128-bit vector of [2 x i64] containing the sign-extended values.\n"
45146"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
45147"_mm_cvtepi32_epi64(__m128i __V)\n"
45148"{\n"
45149" return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v4si)__V, (__v4si)__V, 0, 1), __v2di);\n"
45150"}\n"
45151"\n"
45152"/* SSE4 Packed Integer Zero-Extension. */\n"
45153"/// Zero-extends each of the lower eight 8-bit integer elements of a\n"
45154"/// 128-bit vector of [16 x i8] to 16-bit values and returns them in a\n"
45155"/// 128-bit vector of [8 x i16]. The upper eight elements of the input vector\n"
45156"/// are unused.\n"
45157"///\n"
45158"/// \\headerfile <x86intrin.h>\n"
45159"///\n"
45160"/// This intrinsic corresponds to the <c> VPMOVZXBW / PMOVZXBW </c> instruction.\n"
45161"///\n"
45162"/// \\param __V\n"
45163"/// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are\n"
45164"/// zero-extended to 16-bit values.\n"
45165"/// \\returns A 128-bit vector of [8 x i16] containing the zero-extended values.\n"
45166"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
45167"_mm_cvtepu8_epi16(__m128i __V)\n"
45168"{\n"
45169" return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi);\n"
45170"}\n"
45171"\n"
45172"/// Zero-extends each of the lower four 8-bit integer elements of a\n"
45173"/// 128-bit vector of [16 x i8] to 32-bit values and returns them in a\n"
45174"/// 128-bit vector of [4 x i32]. The upper twelve elements of the input\n"
45175"/// vector are unused.\n"
45176"///\n"
45177"/// \\headerfile <x86intrin.h>\n"
45178"///\n"
45179"/// This intrinsic corresponds to the <c> VPMOVZXBD / PMOVZXBD </c> instruction.\n"
45180"///\n"
45181"/// \\param __V\n"
45182"/// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are\n"
45183"/// zero-extended to 32-bit values.\n"
45184"/// \\returns A 128-bit vector of [4 x i32] containing the zero-extended values.\n"
45185"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
45186"_mm_cvtepu8_epi32(__m128i __V)\n"
45187"{\n"
45188" return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4si);\n"
45189"}\n"
45190"\n"
45191"/// Zero-extends each of the lower two 8-bit integer elements of a\n"
45192"/// 128-bit integer vector of [16 x i8] to 64-bit values and returns them in\n"
45193"/// a 128-bit vector of [2 x i64]. The upper fourteen elements of the input\n"
45194"/// vector are unused.\n"
45195"///\n"
45196"/// \\headerfile <x86intrin.h>\n"
45197"///\n"
45198"/// This intrinsic corresponds to the <c> VPMOVZXBQ / PMOVZXBQ </c> instruction.\n"
45199"///\n"
45200"/// \\param __V\n"
45201"/// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are\n"
45202"/// zero-extended to 64-bit values.\n"
45203"/// \\returns A 128-bit vector of [2 x i64] containing the zero-extended values.\n"
45204"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
45205"_mm_cvtepu8_epi64(__m128i __V)\n"
45206"{\n"
45207" return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1), __v2di);\n"
45208"}\n"
45209"\n"
45210"/// Zero-extends each of the lower four 16-bit integer elements of a\n"
45211"/// 128-bit integer vector of [8 x i16] to 32-bit values and returns them in\n"
45212"/// a 128-bit vector of [4 x i32]. The upper four elements of the input\n"
45213"/// vector are unused.\n"
45214"///\n"
45215"/// \\headerfile <x86intrin.h>\n"
45216"///\n"
45217"/// This intrinsic corresponds to the <c> VPMOVZXWD / PMOVZXWD </c> instruction.\n"
45218"///\n"
45219"/// \\param __V\n"
45220"/// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are\n"
45221"/// zero-extended to 32-bit values.\n"
45222"/// \\returns A 128-bit vector of [4 x i32] containing the zero-extended values.\n"
45223"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
45224"_mm_cvtepu16_epi32(__m128i __V)\n"
45225"{\n"
45226" return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4si);\n"
45227"}\n"
45228"\n"
45229"/// Zero-extends each of the lower two 16-bit integer elements of a\n"
45230"/// 128-bit integer vector of [8 x i16] to 64-bit values and returns them in\n"
45231"/// a 128-bit vector of [2 x i64]. The upper six elements of the input vector\n"
45232"/// are unused.\n"
45233"///\n"
45234"/// \\headerfile <x86intrin.h>\n"
45235"///\n"
45236"/// This intrinsic corresponds to the <c> VPMOVZXWQ / PMOVZXWQ </c> instruction.\n"
45237"///\n"
45238"/// \\param __V\n"
45239"/// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are\n"
45240"/// zero-extended to 64-bit values.\n"
45241"/// \\returns A 128-bit vector of [2 x i64] containing the zero-extended values.\n"
45242"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
45243"_mm_cvtepu16_epi64(__m128i __V)\n"
45244"{\n"
45245" return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1), __v2di);\n"
45246"}\n"
45247"\n"
45248"/// Zero-extends each of the lower two 32-bit integer elements of a\n"
45249"/// 128-bit integer vector of [4 x i32] to 64-bit values and returns them in\n"
45250"/// a 128-bit vector of [2 x i64]. The upper two elements of the input vector\n"
45251"/// are unused.\n"
45252"///\n"
45253"/// \\headerfile <x86intrin.h>\n"
45254"///\n"
45255"/// This intrinsic corresponds to the <c> VPMOVZXDQ / PMOVZXDQ </c> instruction.\n"
45256"///\n"
45257"/// \\param __V\n"
45258"/// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are\n"
45259"/// zero-extended to 64-bit values.\n"
45260"/// \\returns A 128-bit vector of [2 x i64] containing the zero-extended values.\n"
45261"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
45262"_mm_cvtepu32_epi64(__m128i __V)\n"
45263"{\n"
45264" return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v4su)__V, (__v4su)__V, 0, 1), __v2di);\n"
45265"}\n"
45266"\n"
45267"/* SSE4 Pack with Unsigned Saturation. */\n"
45268"/// Converts 32-bit signed integers from both 128-bit integer vector\n"
45269"/// operands into 16-bit unsigned integers, and returns the packed result.\n"
45270"/// Values greater than 0xFFFF are saturated to 0xFFFF. Values less than\n"
45271"/// 0x0000 are saturated to 0x0000.\n"
45272"///\n"
45273"/// \\headerfile <x86intrin.h>\n"
45274"///\n"
45275"/// This intrinsic corresponds to the <c> VPACKUSDW / PACKUSDW </c> instruction.\n"
45276"///\n"
45277"/// \\param __V1\n"
45278"/// A 128-bit vector of [4 x i32]. Each 32-bit element is treated as a\n"
45279"/// signed integer and is converted to a 16-bit unsigned integer with\n"
45280"/// saturation. Values greater than 0xFFFF are saturated to 0xFFFF. Values\n"
45281"/// less than 0x0000 are saturated to 0x0000. The converted [4 x i16] values\n"
45282"/// are written to the lower 64 bits of the result.\n"
45283"/// \\param __V2\n"
45284"/// A 128-bit vector of [4 x i32]. Each 32-bit element is treated as a\n"
45285"/// signed integer and is converted to a 16-bit unsigned integer with\n"
45286"/// saturation. Values greater than 0xFFFF are saturated to 0xFFFF. Values\n"
45287"/// less than 0x0000 are saturated to 0x0000. The converted [4 x i16] values\n"
45288"/// are written to the higher 64 bits of the result.\n"
45289"/// \\returns A 128-bit vector of [8 x i16] containing the converted values.\n"
45290"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
45291"_mm_packus_epi32(__m128i __V1, __m128i __V2)\n"
45292"{\n"
45293" return (__m128i) __builtin_ia32_packusdw128((__v4si)__V1, (__v4si)__V2);\n"
45294"}\n"
45295"\n"
45296"/* SSE4 Multiple Packed Sums of Absolute Difference. */\n"
45297"/// Subtracts 8-bit unsigned integer values and computes the absolute\n"
45298"/// values of the differences to the corresponding bits in the destination.\n"
45299"/// Then sums of the absolute differences are returned according to the bit\n"
45300"/// fields in the immediate operand.\n"
45301"///\n"
45302"/// \\headerfile <x86intrin.h>\n"
45303"///\n"
45304"/// \\code\n"
45305"/// __m128i _mm_mpsadbw_epu8(__m128i X, __m128i Y, const int M);\n"
45306"/// \\endcode\n"
45307"///\n"
45308"/// This intrinsic corresponds to the <c> VMPSADBW / MPSADBW </c> instruction.\n"
45309"///\n"
45310"/// \\param X\n"
45311"/// A 128-bit vector of [16 x i8].\n"
45312"/// \\param Y\n"
45313"/// A 128-bit vector of [16 x i8].\n"
45314"/// \\param M\n"
45315"/// An 8-bit immediate operand specifying how the absolute differences are to\n"
45316"/// be calculated, according to the following algorithm:\n"
45317"/// \\code\n"
45318"/// // M2 represents bit 2 of the immediate operand\n"
45319"/// // M10 represents bits [1:0] of the immediate operand\n"
45320"/// i = M2 * 4;\n"
45321"/// j = M10 * 4;\n"
45322"/// for (k = 0; k < 8; k = k + 1) {\n"
45323"/// d0 = abs(X[i + k + 0] - Y[j + 0]);\n"
45324"/// d1 = abs(X[i + k + 1] - Y[j + 1]);\n"
45325"/// d2 = abs(X[i + k + 2] - Y[j + 2]);\n"
45326"/// d3 = abs(X[i + k + 3] - Y[j + 3]);\n"
45327"/// r[k] = d0 + d1 + d2 + d3;\n"
45328"/// }\n"
45329"/// \\endcode\n"
45330"/// \\returns A 128-bit integer vector containing the sums of the sets of\n"
45331"/// absolute differences between both operands.\n"
45332"#define _mm_mpsadbw_epu8(X, Y, M) \\\n"
45333" (__m128i) __builtin_ia32_mpsadbw128((__v16qi)(__m128i)(X), \\\n"
45334" (__v16qi)(__m128i)(Y), (M))\n"
45335"\n"
45336"/// Finds the minimum unsigned 16-bit element in the input 128-bit\n"
45337"/// vector of [8 x u16] and returns it and along with its index.\n"
45338"///\n"
45339"/// \\headerfile <x86intrin.h>\n"
45340"///\n"
45341"/// This intrinsic corresponds to the <c> VPHMINPOSUW / PHMINPOSUW </c>\n"
45342"/// instruction.\n"
45343"///\n"
45344"/// \\param __V\n"
45345"/// A 128-bit vector of [8 x u16].\n"
45346"/// \\returns A 128-bit value where bits [15:0] contain the minimum value found\n"
45347"/// in parameter \\a __V, bits [18:16] contain the index of the minimum value\n"
45348"/// and the remaining bits are set to 0.\n"
45349"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
45350"_mm_minpos_epu16(__m128i __V)\n"
45351"{\n"
45352" return (__m128i) __builtin_ia32_phminposuw128((__v8hi)__V);\n"
45353"}\n"
45354"\n"
45355"/* Handle the sse4.2 definitions here. */\n"
45356"\n"
45357"/* These definitions are normally in nmmintrin.h, but gcc puts them in here\n"
45358" so we'll do the same. */\n"
45359"\n"
45360"#undef __DEFAULT_FN_ATTRS\n"
45361"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"sse4.2\")))\n"
45362"\n"
45363"/* These specify the type of data that we're comparing. */\n"
45364"#define _SIDD_UBYTE_OPS 0x00\n"
45365"#define _SIDD_UWORD_OPS 0x01\n"
45366"#define _SIDD_SBYTE_OPS 0x02\n"
45367"#define _SIDD_SWORD_OPS 0x03\n"
45368"\n"
45369"/* These specify the type of comparison operation. */\n"
45370"#define _SIDD_CMP_EQUAL_ANY 0x00\n"
45371"#define _SIDD_CMP_RANGES 0x04\n"
45372"#define _SIDD_CMP_EQUAL_EACH 0x08\n"
45373"#define _SIDD_CMP_EQUAL_ORDERED 0x0c\n"
45374"\n"
45375"/* These macros specify the polarity of the operation. */\n"
45376"#define _SIDD_POSITIVE_POLARITY 0x00\n"
45377"#define _SIDD_NEGATIVE_POLARITY 0x10\n"
45378"#define _SIDD_MASKED_POSITIVE_POLARITY 0x20\n"
45379"#define _SIDD_MASKED_NEGATIVE_POLARITY 0x30\n"
45380"\n"
45381"/* These macros are used in _mm_cmpXstri() to specify the return. */\n"
45382"#define _SIDD_LEAST_SIGNIFICANT 0x00\n"
45383"#define _SIDD_MOST_SIGNIFICANT 0x40\n"
45384"\n"
45385"/* These macros are used in _mm_cmpXstri() to specify the return. */\n"
45386"#define _SIDD_BIT_MASK 0x00\n"
45387"#define _SIDD_UNIT_MASK 0x40\n"
45388"\n"
45389"/* SSE4.2 Packed Comparison Intrinsics. */\n"
45390"/// Uses the immediate operand \\a M to perform a comparison of string\n"
45391"/// data with implicitly defined lengths that is contained in source operands\n"
45392"/// \\a A and \\a B. Returns a 128-bit integer vector representing the result\n"
45393"/// mask of the comparison.\n"
45394"///\n"
45395"/// \\headerfile <x86intrin.h>\n"
45396"///\n"
45397"/// \\code\n"
45398"/// __m128i _mm_cmpistrm(__m128i A, __m128i B, const int M);\n"
45399"/// \\endcode\n"
45400"///\n"
45401"/// This intrinsic corresponds to the <c> VPCMPISTRM / PCMPISTRM </c>\n"
45402"/// instruction.\n"
45403"///\n"
45404"/// \\param A\n"
45405"/// A 128-bit integer vector containing one of the source operands to be\n"
45406"/// compared.\n"
45407"/// \\param B\n"
45408"/// A 128-bit integer vector containing one of the source operands to be\n"
45409"/// compared.\n"
45410"/// \\param M\n"
45411"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
45412"/// words, the type of comparison to perform, and the format of the return\n"
45413"/// value. \\n\n"
45414"/// Bits [1:0]: Determine source data format. \\n\n"
45415"/// 00: 16 unsigned bytes \\n\n"
45416"/// 01: 8 unsigned words \\n\n"
45417"/// 10: 16 signed bytes \\n\n"
45418"/// 11: 8 signed words \\n\n"
45419"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
45420"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
45421"/// the characters in \\a A. \\n\n"
45422"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
45423"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
45424"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
45425"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
45426"/// \\a B for equality. \\n\n"
45427"/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n"
45428"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
45429"/// mask of the comparison results. \\n\n"
45430"/// 00: No effect. \\n\n"
45431"/// 01: Negate the bit mask. \\n\n"
45432"/// 10: No effect. \\n\n"
45433"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
45434"/// to the size of \\a A or \\a B. \\n\n"
45435"/// Bit [6]: Determines whether the result is zero-extended or expanded to 16\n"
45436"/// bytes. \\n\n"
45437"/// 0: The result is zero-extended to 16 bytes. \\n\n"
45438"/// 1: The result is expanded to 16 bytes (this expansion is performed by\n"
45439"/// repeating each bit 8 or 16 times).\n"
45440"/// \\returns Returns a 128-bit integer vector representing the result mask of\n"
45441"/// the comparison.\n"
45442"#define _mm_cmpistrm(A, B, M) \\\n"
45443" (__m128i)__builtin_ia32_pcmpistrm128((__v16qi)(__m128i)(A), \\\n"
45444" (__v16qi)(__m128i)(B), (int)(M))\n"
45445"\n"
45446"/// Uses the immediate operand \\a M to perform a comparison of string\n"
45447"/// data with implicitly defined lengths that is contained in source operands\n"
45448"/// \\a A and \\a B. Returns an integer representing the result index of the\n"
45449"/// comparison.\n"
45450"///\n"
45451"/// \\headerfile <x86intrin.h>\n"
45452"///\n"
45453"/// \\code\n"
45454"/// int _mm_cmpistri(__m128i A, __m128i B, const int M);\n"
45455"/// \\endcode\n"
45456"///\n"
45457"/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>\n"
45458"/// instruction.\n"
45459"///\n"
45460"/// \\param A\n"
45461"/// A 128-bit integer vector containing one of the source operands to be\n"
45462"/// compared.\n"
45463"/// \\param B\n"
45464"/// A 128-bit integer vector containing one of the source operands to be\n"
45465"/// compared.\n"
45466"/// \\param M\n"
45467"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
45468"/// words, the type of comparison to perform, and the format of the return\n"
45469"/// value. \\n\n"
45470"/// Bits [1:0]: Determine source data format. \\n\n"
45471"/// 00: 16 unsigned bytes \\n\n"
45472"/// 01: 8 unsigned words \\n\n"
45473"/// 10: 16 signed bytes \\n\n"
45474"/// 11: 8 signed words \\n\n"
45475"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
45476"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
45477"/// the characters in \\a A. \\n\n"
45478"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
45479"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
45480"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
45481"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
45482"/// \\a B for equality. \\n\n"
45483"/// 11: Substring: Search B for substring matches of \\a A. \\n\n"
45484"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
45485"/// mask of the comparison results. \\n\n"
45486"/// 00: No effect. \\n\n"
45487"/// 01: Negate the bit mask. \\n\n"
45488"/// 10: No effect. \\n\n"
45489"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
45490"/// to the size of \\a A or \\a B. \\n\n"
45491"/// Bit [6]: Determines whether the index of the lowest set bit or the\n"
45492"/// highest set bit is returned. \\n\n"
45493"/// 0: The index of the least significant set bit. \\n\n"
45494"/// 1: The index of the most significant set bit. \\n\n"
45495"/// \\returns Returns an integer representing the result index of the comparison.\n"
45496"#define _mm_cmpistri(A, B, M) \\\n"
45497" (int)__builtin_ia32_pcmpistri128((__v16qi)(__m128i)(A), \\\n"
45498" (__v16qi)(__m128i)(B), (int)(M))\n"
45499"\n"
45500"/// Uses the immediate operand \\a M to perform a comparison of string\n"
45501"/// data with explicitly defined lengths that is contained in source operands\n"
45502"/// \\a A and \\a B. Returns a 128-bit integer vector representing the result\n"
45503"/// mask of the comparison.\n"
45504"///\n"
45505"/// \\headerfile <x86intrin.h>\n"
45506"///\n"
45507"/// \\code\n"
45508"/// __m128i _mm_cmpestrm(__m128i A, int LA, __m128i B, int LB, const int M);\n"
45509"/// \\endcode\n"
45510"///\n"
45511"/// This intrinsic corresponds to the <c> VPCMPESTRM / PCMPESTRM </c>\n"
45512"/// instruction.\n"
45513"///\n"
45514"/// \\param A\n"
45515"/// A 128-bit integer vector containing one of the source operands to be\n"
45516"/// compared.\n"
45517"/// \\param LA\n"
45518"/// An integer that specifies the length of the string in \\a A.\n"
45519"/// \\param B\n"
45520"/// A 128-bit integer vector containing one of the source operands to be\n"
45521"/// compared.\n"
45522"/// \\param LB\n"
45523"/// An integer that specifies the length of the string in \\a B.\n"
45524"/// \\param M\n"
45525"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
45526"/// words, the type of comparison to perform, and the format of the return\n"
45527"/// value. \\n\n"
45528"/// Bits [1:0]: Determine source data format. \\n\n"
45529"/// 00: 16 unsigned bytes \\n\n"
45530"/// 01: 8 unsigned words \\n\n"
45531"/// 10: 16 signed bytes \\n\n"
45532"/// 11: 8 signed words \\n\n"
45533"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
45534"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
45535"/// the characters in \\a A. \\n\n"
45536"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
45537"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
45538"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
45539"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
45540"/// \\a B for equality. \\n\n"
45541"/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n"
45542"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
45543"/// mask of the comparison results. \\n\n"
45544"/// 00: No effect. \\n\n"
45545"/// 01: Negate the bit mask. \\n\n"
45546"/// 10: No effect. \\n\n"
45547"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
45548"/// to the size of \\a A or \\a B. \\n\n"
45549"/// Bit [6]: Determines whether the result is zero-extended or expanded to 16\n"
45550"/// bytes. \\n\n"
45551"/// 0: The result is zero-extended to 16 bytes. \\n\n"
45552"/// 1: The result is expanded to 16 bytes (this expansion is performed by\n"
45553"/// repeating each bit 8 or 16 times). \\n\n"
45554"/// \\returns Returns a 128-bit integer vector representing the result mask of\n"
45555"/// the comparison.\n"
45556"#define _mm_cmpestrm(A, LA, B, LB, M) \\\n"
45557" (__m128i)__builtin_ia32_pcmpestrm128((__v16qi)(__m128i)(A), (int)(LA), \\\n"
45558" (__v16qi)(__m128i)(B), (int)(LB), \\\n"
45559" (int)(M))\n"
45560"\n"
45561"/// Uses the immediate operand \\a M to perform a comparison of string\n"
45562"/// data with explicitly defined lengths that is contained in source operands\n"
45563"/// \\a A and \\a B. Returns an integer representing the result index of the\n"
45564"/// comparison.\n"
45565"///\n"
45566"/// \\headerfile <x86intrin.h>\n"
45567"///\n"
45568"/// \\code\n"
45569"/// int _mm_cmpestri(__m128i A, int LA, __m128i B, int LB, const int M);\n"
45570"/// \\endcode\n"
45571"///\n"
45572"/// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>\n"
45573"/// instruction.\n"
45574"///\n"
45575"/// \\param A\n"
45576"/// A 128-bit integer vector containing one of the source operands to be\n"
45577"/// compared.\n"
45578"/// \\param LA\n"
45579"/// An integer that specifies the length of the string in \\a A.\n"
45580"/// \\param B\n"
45581"/// A 128-bit integer vector containing one of the source operands to be\n"
45582"/// compared.\n"
45583"/// \\param LB\n"
45584"/// An integer that specifies the length of the string in \\a B.\n"
45585"/// \\param M\n"
45586"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
45587"/// words, the type of comparison to perform, and the format of the return\n"
45588"/// value. \\n\n"
45589"/// Bits [1:0]: Determine source data format. \\n\n"
45590"/// 00: 16 unsigned bytes \\n\n"
45591"/// 01: 8 unsigned words \\n\n"
45592"/// 10: 16 signed bytes \\n\n"
45593"/// 11: 8 signed words \\n\n"
45594"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
45595"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
45596"/// the characters in \\a A. \\n\n"
45597"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
45598"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
45599"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
45600"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
45601"/// \\a B for equality. \\n\n"
45602"/// 11: Substring: Search B for substring matches of \\a A. \\n\n"
45603"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
45604"/// mask of the comparison results. \\n\n"
45605"/// 00: No effect. \\n\n"
45606"/// 01: Negate the bit mask. \\n\n"
45607"/// 10: No effect. \\n\n"
45608"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
45609"/// to the size of \\a A or \\a B. \\n\n"
45610"/// Bit [6]: Determines whether the index of the lowest set bit or the\n"
45611"/// highest set bit is returned. \\n\n"
45612"/// 0: The index of the least significant set bit. \\n\n"
45613"/// 1: The index of the most significant set bit. \\n\n"
45614"/// \\returns Returns an integer representing the result index of the comparison.\n"
45615"#define _mm_cmpestri(A, LA, B, LB, M) \\\n"
45616" (int)__builtin_ia32_pcmpestri128((__v16qi)(__m128i)(A), (int)(LA), \\\n"
45617" (__v16qi)(__m128i)(B), (int)(LB), \\\n"
45618" (int)(M))\n"
45619"\n"
45620"/* SSE4.2 Packed Comparison Intrinsics and EFlag Reading. */\n"
45621"/// Uses the immediate operand \\a M to perform a comparison of string\n"
45622"/// data with implicitly defined lengths that is contained in source operands\n"
45623"/// \\a A and \\a B. Returns 1 if the bit mask is zero and the length of the\n"
45624"/// string in \\a B is the maximum, otherwise, returns 0.\n"
45625"///\n"
45626"/// \\headerfile <x86intrin.h>\n"
45627"///\n"
45628"/// \\code\n"
45629"/// int _mm_cmpistra(__m128i A, __m128i B, const int M);\n"
45630"/// \\endcode\n"
45631"///\n"
45632"/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>\n"
45633"/// instruction.\n"
45634"///\n"
45635"/// \\param A\n"
45636"/// A 128-bit integer vector containing one of the source operands to be\n"
45637"/// compared.\n"
45638"/// \\param B\n"
45639"/// A 128-bit integer vector containing one of the source operands to be\n"
45640"/// compared.\n"
45641"/// \\param M\n"
45642"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
45643"/// words and the type of comparison to perform. \\n\n"
45644"/// Bits [1:0]: Determine source data format. \\n\n"
45645"/// 00: 16 unsigned bytes \\n\n"
45646"/// 01: 8 unsigned words \\n\n"
45647"/// 10: 16 signed bytes \\n\n"
45648"/// 11: 8 signed words \\n\n"
45649"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
45650"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
45651"/// the characters in \\a A. \\n\n"
45652"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
45653"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
45654"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
45655"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
45656"/// \\a B for equality. \\n\n"
45657"/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n"
45658"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
45659"/// mask of the comparison results. \\n\n"
45660"/// 00: No effect. \\n\n"
45661"/// 01: Negate the bit mask. \\n\n"
45662"/// 10: No effect. \\n\n"
45663"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
45664"/// to the size of \\a A or \\a B. \\n\n"
45665"/// \\returns Returns 1 if the bit mask is zero and the length of the string in\n"
45666"/// \\a B is the maximum; otherwise, returns 0.\n"
45667"#define _mm_cmpistra(A, B, M) \\\n"
45668" (int)__builtin_ia32_pcmpistria128((__v16qi)(__m128i)(A), \\\n"
45669" (__v16qi)(__m128i)(B), (int)(M))\n"
45670"\n"
45671"/// Uses the immediate operand \\a M to perform a comparison of string\n"
45672"/// data with implicitly defined lengths that is contained in source operands\n"
45673"/// \\a A and \\a B. Returns 1 if the bit mask is non-zero, otherwise, returns\n"
45674"/// 0.\n"
45675"///\n"
45676"/// \\headerfile <x86intrin.h>\n"
45677"///\n"
45678"/// \\code\n"
45679"/// int _mm_cmpistrc(__m128i A, __m128i B, const int M);\n"
45680"/// \\endcode\n"
45681"///\n"
45682"/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>\n"
45683"/// instruction.\n"
45684"///\n"
45685"/// \\param A\n"
45686"/// A 128-bit integer vector containing one of the source operands to be\n"
45687"/// compared.\n"
45688"/// \\param B\n"
45689"/// A 128-bit integer vector containing one of the source operands to be\n"
45690"/// compared.\n"
45691"/// \\param M\n"
45692"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
45693"/// words and the type of comparison to perform. \\n\n"
45694"/// Bits [1:0]: Determine source data format. \\n\n"
45695"/// 00: 16 unsigned bytes \\n\n"
45696"/// 01: 8 unsigned words \\n\n"
45697"/// 10: 16 signed bytes \\n\n"
45698"/// 11: 8 signed words \\n\n"
45699"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
45700"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
45701"/// the characters in \\a A. \\n\n"
45702"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
45703"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
45704"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
45705"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
45706"/// \\a B for equality. \\n\n"
45707"/// 11: Substring: Search B for substring matches of \\a A. \\n\n"
45708"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
45709"/// mask of the comparison results. \\n\n"
45710"/// 00: No effect. \\n\n"
45711"/// 01: Negate the bit mask. \\n\n"
45712"/// 10: No effect. \\n\n"
45713"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
45714"/// to the size of \\a A or \\a B.\n"
45715"/// \\returns Returns 1 if the bit mask is non-zero, otherwise, returns 0.\n"
45716"#define _mm_cmpistrc(A, B, M) \\\n"
45717" (int)__builtin_ia32_pcmpistric128((__v16qi)(__m128i)(A), \\\n"
45718" (__v16qi)(__m128i)(B), (int)(M))\n"
45719"\n"
45720"/// Uses the immediate operand \\a M to perform a comparison of string\n"
45721"/// data with implicitly defined lengths that is contained in source operands\n"
45722"/// \\a A and \\a B. Returns bit 0 of the resulting bit mask.\n"
45723"///\n"
45724"/// \\headerfile <x86intrin.h>\n"
45725"///\n"
45726"/// \\code\n"
45727"/// int _mm_cmpistro(__m128i A, __m128i B, const int M);\n"
45728"/// \\endcode\n"
45729"///\n"
45730"/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>\n"
45731"/// instruction.\n"
45732"///\n"
45733"/// \\param A\n"
45734"/// A 128-bit integer vector containing one of the source operands to be\n"
45735"/// compared.\n"
45736"/// \\param B\n"
45737"/// A 128-bit integer vector containing one of the source operands to be\n"
45738"/// compared.\n"
45739"/// \\param M\n"
45740"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
45741"/// words and the type of comparison to perform. \\n\n"
45742"/// Bits [1:0]: Determine source data format. \\n\n"
45743"/// 00: 16 unsigned bytes \\n\n"
45744"/// 01: 8 unsigned words \\n\n"
45745"/// 10: 16 signed bytes \\n\n"
45746"/// 11: 8 signed words \\n\n"
45747"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
45748"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
45749"/// the characters in \\a A. \\n\n"
45750"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
45751"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
45752"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
45753"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
45754"/// \\a B for equality. \\n\n"
45755"/// 11: Substring: Search B for substring matches of \\a A. \\n\n"
45756"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
45757"/// mask of the comparison results. \\n\n"
45758"/// 00: No effect. \\n\n"
45759"/// 01: Negate the bit mask. \\n\n"
45760"/// 10: No effect. \\n\n"
45761"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
45762"/// to the size of \\a A or \\a B. \\n\n"
45763"/// \\returns Returns bit 0 of the resulting bit mask.\n"
45764"#define _mm_cmpistro(A, B, M) \\\n"
45765" (int)__builtin_ia32_pcmpistrio128((__v16qi)(__m128i)(A), \\\n"
45766" (__v16qi)(__m128i)(B), (int)(M))\n"
45767"\n"
45768"/// Uses the immediate operand \\a M to perform a comparison of string\n"
45769"/// data with implicitly defined lengths that is contained in source operands\n"
45770"/// \\a A and \\a B. Returns 1 if the length of the string in \\a A is less than\n"
45771"/// the maximum, otherwise, returns 0.\n"
45772"///\n"
45773"/// \\headerfile <x86intrin.h>\n"
45774"///\n"
45775"/// \\code\n"
45776"/// int _mm_cmpistrs(__m128i A, __m128i B, const int M);\n"
45777"/// \\endcode\n"
45778"///\n"
45779"/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>\n"
45780"/// instruction.\n"
45781"///\n"
45782"/// \\param A\n"
45783"/// A 128-bit integer vector containing one of the source operands to be\n"
45784"/// compared.\n"
45785"/// \\param B\n"
45786"/// A 128-bit integer vector containing one of the source operands to be\n"
45787"/// compared.\n"
45788"/// \\param M\n"
45789"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
45790"/// words and the type of comparison to perform. \\n\n"
45791"/// Bits [1:0]: Determine source data format. \\n\n"
45792"/// 00: 16 unsigned bytes \\n\n"
45793"/// 01: 8 unsigned words \\n\n"
45794"/// 10: 16 signed bytes \\n\n"
45795"/// 11: 8 signed words \\n\n"
45796"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
45797"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
45798"/// the characters in \\a A. \\n\n"
45799"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
45800"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
45801"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
45802"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
45803"/// \\a B for equality. \\n\n"
45804"/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n"
45805"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
45806"/// mask of the comparison results. \\n\n"
45807"/// 00: No effect. \\n\n"
45808"/// 01: Negate the bit mask. \\n\n"
45809"/// 10: No effect. \\n\n"
45810"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
45811"/// to the size of \\a A or \\a B. \\n\n"
45812"/// \\returns Returns 1 if the length of the string in \\a A is less than the\n"
45813"/// maximum, otherwise, returns 0.\n"
45814"#define _mm_cmpistrs(A, B, M) \\\n"
45815" (int)__builtin_ia32_pcmpistris128((__v16qi)(__m128i)(A), \\\n"
45816" (__v16qi)(__m128i)(B), (int)(M))\n"
45817"\n"
45818"/// Uses the immediate operand \\a M to perform a comparison of string\n"
45819"/// data with implicitly defined lengths that is contained in source operands\n"
45820"/// \\a A and \\a B. Returns 1 if the length of the string in \\a B is less than\n"
45821"/// the maximum, otherwise, returns 0.\n"
45822"///\n"
45823"/// \\headerfile <x86intrin.h>\n"
45824"///\n"
45825"/// \\code\n"
45826"/// int _mm_cmpistrz(__m128i A, __m128i B, const int M);\n"
45827"/// \\endcode\n"
45828"///\n"
45829"/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>\n"
45830"/// instruction.\n"
45831"///\n"
45832"/// \\param A\n"
45833"/// A 128-bit integer vector containing one of the source operands to be\n"
45834"/// compared.\n"
45835"/// \\param B\n"
45836"/// A 128-bit integer vector containing one of the source operands to be\n"
45837"/// compared.\n"
45838"/// \\param M\n"
45839"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
45840"/// words and the type of comparison to perform. \\n\n"
45841"/// Bits [1:0]: Determine source data format. \\n\n"
45842"/// 00: 16 unsigned bytes \\n\n"
45843"/// 01: 8 unsigned words \\n\n"
45844"/// 10: 16 signed bytes \\n\n"
45845"/// 11: 8 signed words \\n\n"
45846"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
45847"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
45848"/// the characters in \\a A. \\n\n"
45849"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
45850"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
45851"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
45852"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
45853"/// \\a B for equality. \\n\n"
45854"/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n"
45855"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
45856"/// mask of the comparison results. \\n\n"
45857"/// 00: No effect. \\n\n"
45858"/// 01: Negate the bit mask. \\n\n"
45859"/// 10: No effect. \\n\n"
45860"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
45861"/// to the size of \\a A or \\a B.\n"
45862"/// \\returns Returns 1 if the length of the string in \\a B is less than the\n"
45863"/// maximum, otherwise, returns 0.\n"
45864"#define _mm_cmpistrz(A, B, M) \\\n"
45865" (int)__builtin_ia32_pcmpistriz128((__v16qi)(__m128i)(A), \\\n"
45866" (__v16qi)(__m128i)(B), (int)(M))\n"
45867"\n"
45868"/// Uses the immediate operand \\a M to perform a comparison of string\n"
45869"/// data with explicitly defined lengths that is contained in source operands\n"
45870"/// \\a A and \\a B. Returns 1 if the bit mask is zero and the length of the\n"
45871"/// string in \\a B is the maximum, otherwise, returns 0.\n"
45872"///\n"
45873"/// \\headerfile <x86intrin.h>\n"
45874"///\n"
45875"/// \\code\n"
45876"/// int _mm_cmpestra(__m128i A, int LA, __m128i B, int LB, const int M);\n"
45877"/// \\endcode\n"
45878"///\n"
45879"/// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>\n"
45880"/// instruction.\n"
45881"///\n"
45882"/// \\param A\n"
45883"/// A 128-bit integer vector containing one of the source operands to be\n"
45884"/// compared.\n"
45885"/// \\param LA\n"
45886"/// An integer that specifies the length of the string in \\a A.\n"
45887"/// \\param B\n"
45888"/// A 128-bit integer vector containing one of the source operands to be\n"
45889"/// compared.\n"
45890"/// \\param LB\n"
45891"/// An integer that specifies the length of the string in \\a B.\n"
45892"/// \\param M\n"
45893"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
45894"/// words and the type of comparison to perform. \\n\n"
45895"/// Bits [1:0]: Determine source data format. \\n\n"
45896"/// 00: 16 unsigned bytes \\n\n"
45897"/// 01: 8 unsigned words \\n\n"
45898"/// 10: 16 signed bytes \\n\n"
45899"/// 11: 8 signed words \\n\n"
45900"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
45901"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
45902"/// the characters in \\a A. \\n\n"
45903"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
45904"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
45905"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
45906"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
45907"/// \\a B for equality. \\n\n"
45908"/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n"
45909"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
45910"/// mask of the comparison results. \\n\n"
45911"/// 00: No effect. \\n\n"
45912"/// 01: Negate the bit mask. \\n\n"
45913"/// 10: No effect. \\n\n"
45914"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
45915"/// to the size of \\a A or \\a B.\n"
45916"/// \\returns Returns 1 if the bit mask is zero and the length of the string in\n"
45917"/// \\a B is the maximum, otherwise, returns 0.\n"
45918"#define _mm_cmpestra(A, LA, B, LB, M) \\\n"
45919" (int)__builtin_ia32_pcmpestria128((__v16qi)(__m128i)(A), (int)(LA), \\\n"
45920" (__v16qi)(__m128i)(B), (int)(LB), \\\n"
45921" (int)(M))\n"
45922"\n"
45923"/// Uses the immediate operand \\a M to perform a comparison of string\n"
45924"/// data with explicitly defined lengths that is contained in source operands\n"
45925"/// \\a A and \\a B. Returns 1 if the resulting mask is non-zero, otherwise,\n"
45926"/// returns 0.\n"
45927"///\n"
45928"/// \\headerfile <x86intrin.h>\n"
45929"///\n"
45930"/// \\code\n"
45931"/// int _mm_cmpestrc(__m128i A, int LA, __m128i B, int LB, const int M);\n"
45932"/// \\endcode\n"
45933"///\n"
45934"/// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>\n"
45935"/// instruction.\n"
45936"///\n"
45937"/// \\param A\n"
45938"/// A 128-bit integer vector containing one of the source operands to be\n"
45939"/// compared.\n"
45940"/// \\param LA\n"
45941"/// An integer that specifies the length of the string in \\a A.\n"
45942"/// \\param B\n"
45943"/// A 128-bit integer vector containing one of the source operands to be\n"
45944"/// compared.\n"
45945"/// \\param LB\n"
45946"/// An integer that specifies the length of the string in \\a B.\n"
45947"/// \\param M\n"
45948"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
45949"/// words and the type of comparison to perform. \\n\n"
45950"/// Bits [1:0]: Determine source data format. \\n\n"
45951"/// 00: 16 unsigned bytes \\n\n"
45952"/// 01: 8 unsigned words \\n\n"
45953"/// 10: 16 signed bytes \\n\n"
45954"/// 11: 8 signed words \\n\n"
45955"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
45956"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
45957"/// the characters in \\a A. \\n\n"
45958"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
45959"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
45960"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
45961"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
45962"/// \\a B for equality. \\n\n"
45963"/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n"
45964"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
45965"/// mask of the comparison results. \\n\n"
45966"/// 00: No effect. \\n\n"
45967"/// 01: Negate the bit mask. \\n\n"
45968"/// 10: No effect. \\n\n"
45969"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
45970"/// to the size of \\a A or \\a B. \\n\n"
45971"/// \\returns Returns 1 if the resulting mask is non-zero, otherwise, returns 0.\n"
45972"#define _mm_cmpestrc(A, LA, B, LB, M) \\\n"
45973" (int)__builtin_ia32_pcmpestric128((__v16qi)(__m128i)(A), (int)(LA), \\\n"
45974" (__v16qi)(__m128i)(B), (int)(LB), \\\n"
45975" (int)(M))\n"
45976"\n"
45977"/// Uses the immediate operand \\a M to perform a comparison of string\n"
45978"/// data with explicitly defined lengths that is contained in source operands\n"
45979"/// \\a A and \\a B. Returns bit 0 of the resulting bit mask.\n"
45980"///\n"
45981"/// \\headerfile <x86intrin.h>\n"
45982"///\n"
45983"/// \\code\n"
45984"/// int _mm_cmpestro(__m128i A, int LA, __m128i B, int LB, const int M);\n"
45985"/// \\endcode\n"
45986"///\n"
45987"/// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>\n"
45988"/// instruction.\n"
45989"///\n"
45990"/// \\param A\n"
45991"/// A 128-bit integer vector containing one of the source operands to be\n"
45992"/// compared.\n"
45993"/// \\param LA\n"
45994"/// An integer that specifies the length of the string in \\a A.\n"
45995"/// \\param B\n"
45996"/// A 128-bit integer vector containing one of the source operands to be\n"
45997"/// compared.\n"
45998"/// \\param LB\n"
45999"/// An integer that specifies the length of the string in \\a B.\n"
46000"/// \\param M\n"
46001"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
46002"/// words and the type of comparison to perform. \\n\n"
46003"/// Bits [1:0]: Determine source data format. \\n\n"
46004"/// 00: 16 unsigned bytes \\n\n"
46005"/// 01: 8 unsigned words \\n\n"
46006"/// 10: 16 signed bytes \\n\n"
46007"/// 11: 8 signed words \\n\n"
46008"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
46009"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
46010"/// the characters in \\a A. \\n\n"
46011"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
46012"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
46013"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
46014"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
46015"/// \\a B for equality. \\n\n"
46016"/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n"
46017"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
46018"/// mask of the comparison results. \\n\n"
46019"/// 00: No effect. \\n\n"
46020"/// 01: Negate the bit mask. \\n\n"
46021"/// 10: No effect. \\n\n"
46022"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
46023"/// to the size of \\a A or \\a B.\n"
46024"/// \\returns Returns bit 0 of the resulting bit mask.\n"
46025"#define _mm_cmpestro(A, LA, B, LB, M) \\\n"
46026" (int)__builtin_ia32_pcmpestrio128((__v16qi)(__m128i)(A), (int)(LA), \\\n"
46027" (__v16qi)(__m128i)(B), (int)(LB), \\\n"
46028" (int)(M))\n"
46029"\n"
46030"/// Uses the immediate operand \\a M to perform a comparison of string\n"
46031"/// data with explicitly defined lengths that is contained in source operands\n"
46032"/// \\a A and \\a B. Returns 1 if the length of the string in \\a A is less than\n"
46033"/// the maximum, otherwise, returns 0.\n"
46034"///\n"
46035"/// \\headerfile <x86intrin.h>\n"
46036"///\n"
46037"/// \\code\n"
46038"/// int _mm_cmpestrs(__m128i A, int LA, __m128i B, int LB, const int M);\n"
46039"/// \\endcode\n"
46040"///\n"
46041"/// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>\n"
46042"/// instruction.\n"
46043"///\n"
46044"/// \\param A\n"
46045"/// A 128-bit integer vector containing one of the source operands to be\n"
46046"/// compared.\n"
46047"/// \\param LA\n"
46048"/// An integer that specifies the length of the string in \\a A.\n"
46049"/// \\param B\n"
46050"/// A 128-bit integer vector containing one of the source operands to be\n"
46051"/// compared.\n"
46052"/// \\param LB\n"
46053"/// An integer that specifies the length of the string in \\a B.\n"
46054"/// \\param M\n"
46055"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
46056"/// words and the type of comparison to perform. \\n\n"
46057"/// Bits [1:0]: Determine source data format. \\n\n"
46058"/// 00: 16 unsigned bytes \\n\n"
46059"/// 01: 8 unsigned words \\n\n"
46060"/// 10: 16 signed bytes \\n\n"
46061"/// 11: 8 signed words \\n\n"
46062"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
46063"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
46064"/// the characters in \\a A. \\n\n"
46065"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
46066"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
46067"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
46068"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
46069"/// \\a B for equality. \\n\n"
46070"/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n"
46071"/// Bits [5:4]: Determine whether to perform a one's complement in the bit\n"
46072"/// mask of the comparison results. \\n\n"
46073"/// 00: No effect. \\n\n"
46074"/// 01: Negate the bit mask. \\n\n"
46075"/// 10: No effect. \\n\n"
46076"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
46077"/// to the size of \\a A or \\a B. \\n\n"
46078"/// \\returns Returns 1 if the length of the string in \\a A is less than the\n"
46079"/// maximum, otherwise, returns 0.\n"
46080"#define _mm_cmpestrs(A, LA, B, LB, M) \\\n"
46081" (int)__builtin_ia32_pcmpestris128((__v16qi)(__m128i)(A), (int)(LA), \\\n"
46082" (__v16qi)(__m128i)(B), (int)(LB), \\\n"
46083" (int)(M))\n"
46084"\n"
46085"/// Uses the immediate operand \\a M to perform a comparison of string\n"
46086"/// data with explicitly defined lengths that is contained in source operands\n"
46087"/// \\a A and \\a B. Returns 1 if the length of the string in \\a B is less than\n"
46088"/// the maximum, otherwise, returns 0.\n"
46089"///\n"
46090"/// \\headerfile <x86intrin.h>\n"
46091"///\n"
46092"/// \\code\n"
46093"/// int _mm_cmpestrz(__m128i A, int LA, __m128i B, int LB, const int M);\n"
46094"/// \\endcode\n"
46095"///\n"
46096"/// This intrinsic corresponds to the <c> VPCMPESTRI </c> instruction.\n"
46097"///\n"
46098"/// \\param A\n"
46099"/// A 128-bit integer vector containing one of the source operands to be\n"
46100"/// compared.\n"
46101"/// \\param LA\n"
46102"/// An integer that specifies the length of the string in \\a A.\n"
46103"/// \\param B\n"
46104"/// A 128-bit integer vector containing one of the source operands to be\n"
46105"/// compared.\n"
46106"/// \\param LB\n"
46107"/// An integer that specifies the length of the string in \\a B.\n"
46108"/// \\param M\n"
46109"/// An 8-bit immediate operand specifying whether the characters are bytes or\n"
46110"/// words and the type of comparison to perform. \\n\n"
46111"/// Bits [1:0]: Determine source data format. \\n\n"
46112"/// 00: 16 unsigned bytes \\n\n"
46113"/// 01: 8 unsigned words \\n\n"
46114"/// 10: 16 signed bytes \\n\n"
46115"/// 11: 8 signed words \\n\n"
46116"/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n"
46117"/// 00: Subset: Each character in \\a B is compared for equality with all\n"
46118"/// the characters in \\a A. \\n\n"
46119"/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n"
46120"/// basis is greater than or equal for even-indexed elements in \\a A,\n"
46121"/// and less than or equal for odd-indexed elements in \\a A. \\n\n"
46122"/// 10: Match: Compare each pair of corresponding characters in \\a A and\n"
46123"/// \\a B for equality. \\n\n"
46124"/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n"
46125"/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n"
46126"/// mask of the comparison results. \\n\n"
46127"/// 00: No effect. \\n\n"
46128"/// 01: Negate the bit mask. \\n\n"
46129"/// 10: No effect. \\n\n"
46130"/// 11: Negate the bit mask only for bits with an index less than or equal\n"
46131"/// to the size of \\a A or \\a B.\n"
46132"/// \\returns Returns 1 if the length of the string in \\a B is less than the\n"
46133"/// maximum, otherwise, returns 0.\n"
46134"#define _mm_cmpestrz(A, LA, B, LB, M) \\\n"
46135" (int)__builtin_ia32_pcmpestriz128((__v16qi)(__m128i)(A), (int)(LA), \\\n"
46136" (__v16qi)(__m128i)(B), (int)(LB), \\\n"
46137" (int)(M))\n"
46138"\n"
46139"/* SSE4.2 Compare Packed Data -- Greater Than. */\n"
46140"/// Compares each of the corresponding 64-bit values of the 128-bit\n"
46141"/// integer vectors to determine if the values in the first operand are\n"
46142"/// greater than those in the second operand.\n"
46143"///\n"
46144"/// \\headerfile <x86intrin.h>\n"
46145"///\n"
46146"/// This intrinsic corresponds to the <c> VPCMPGTQ / PCMPGTQ </c> instruction.\n"
46147"///\n"
46148"/// \\param __V1\n"
46149"/// A 128-bit integer vector.\n"
46150"/// \\param __V2\n"
46151"/// A 128-bit integer vector.\n"
46152"/// \\returns A 128-bit integer vector containing the comparison results.\n"
46153"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
46154"_mm_cmpgt_epi64(__m128i __V1, __m128i __V2)\n"
46155"{\n"
46156" return (__m128i)((__v2di)__V1 > (__v2di)__V2);\n"
46157"}\n"
46158"\n"
46159"/* SSE4.2 Accumulate CRC32. */\n"
46160"/// Adds the unsigned integer operand to the CRC-32C checksum of the\n"
46161"/// unsigned char operand.\n"
46162"///\n"
46163"/// \\headerfile <x86intrin.h>\n"
46164"///\n"
46165"/// This intrinsic corresponds to the <c> CRC32B </c> instruction.\n"
46166"///\n"
46167"/// \\param __C\n"
46168"/// An unsigned integer operand to add to the CRC-32C checksum of operand\n"
46169"/// \\a __D.\n"
46170"/// \\param __D\n"
46171"/// An unsigned 8-bit integer operand used to compute the CRC-32C checksum.\n"
46172"/// \\returns The result of adding operand \\a __C to the CRC-32C checksum of\n"
46173"/// operand \\a __D.\n"
46174"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
46175"_mm_crc32_u8(unsigned int __C, unsigned char __D)\n"
46176"{\n"
46177" return __builtin_ia32_crc32qi(__C, __D);\n"
46178"}\n"
46179"\n"
46180"/// Adds the unsigned integer operand to the CRC-32C checksum of the\n"
46181"/// unsigned short operand.\n"
46182"///\n"
46183"/// \\headerfile <x86intrin.h>\n"
46184"///\n"
46185"/// This intrinsic corresponds to the <c> CRC32W </c> instruction.\n"
46186"///\n"
46187"/// \\param __C\n"
46188"/// An unsigned integer operand to add to the CRC-32C checksum of operand\n"
46189"/// \\a __D.\n"
46190"/// \\param __D\n"
46191"/// An unsigned 16-bit integer operand used to compute the CRC-32C checksum.\n"
46192"/// \\returns The result of adding operand \\a __C to the CRC-32C checksum of\n"
46193"/// operand \\a __D.\n"
46194"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
46195"_mm_crc32_u16(unsigned int __C, unsigned short __D)\n"
46196"{\n"
46197" return __builtin_ia32_crc32hi(__C, __D);\n"
46198"}\n"
46199"\n"
46200"/// Adds the first unsigned integer operand to the CRC-32C checksum of\n"
46201"/// the second unsigned integer operand.\n"
46202"///\n"
46203"/// \\headerfile <x86intrin.h>\n"
46204"///\n"
46205"/// This intrinsic corresponds to the <c> CRC32L </c> instruction.\n"
46206"///\n"
46207"/// \\param __C\n"
46208"/// An unsigned integer operand to add to the CRC-32C checksum of operand\n"
46209"/// \\a __D.\n"
46210"/// \\param __D\n"
46211"/// An unsigned 32-bit integer operand used to compute the CRC-32C checksum.\n"
46212"/// \\returns The result of adding operand \\a __C to the CRC-32C checksum of\n"
46213"/// operand \\a __D.\n"
46214"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
46215"_mm_crc32_u32(unsigned int __C, unsigned int __D)\n"
46216"{\n"
46217" return __builtin_ia32_crc32si(__C, __D);\n"
46218"}\n"
46219"\n"
46220"#ifdef __x86_64__\n"
46221"/// Adds the unsigned integer operand to the CRC-32C checksum of the\n"
46222"/// unsigned 64-bit integer operand.\n"
46223"///\n"
46224"/// \\headerfile <x86intrin.h>\n"
46225"///\n"
46226"/// This intrinsic corresponds to the <c> CRC32Q </c> instruction.\n"
46227"///\n"
46228"/// \\param __C\n"
46229"/// An unsigned integer operand to add to the CRC-32C checksum of operand\n"
46230"/// \\a __D.\n"
46231"/// \\param __D\n"
46232"/// An unsigned 64-bit integer operand used to compute the CRC-32C checksum.\n"
46233"/// \\returns The result of adding operand \\a __C to the CRC-32C checksum of\n"
46234"/// operand \\a __D.\n"
46235"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
46236"_mm_crc32_u64(unsigned long long __C, unsigned long long __D)\n"
46237"{\n"
46238" return __builtin_ia32_crc32di(__C, __D);\n"
46239"}\n"
46240"#endif /* __x86_64__ */\n"
46241"\n"
46242"#undef __DEFAULT_FN_ATTRS\n"
46243"\n"
46244"#include <popcntintrin.h>\n"
46245"\n"
46246"#endif /* __SMMINTRIN_H */\n"
46247"" } ,
46248 { "/builtins/stdalign.h" , "/*===---- stdalign.h - Standard header for alignment ------------------------===\n"
46249" *\n"
46250" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
46251" * of this software and associated documentation files (the \"Software\"), to deal\n"
46252" * in the Software without restriction, including without limitation the rights\n"
46253" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
46254" * copies of the Software, and to permit persons to whom the Software is\n"
46255" * furnished to do so, subject to the following conditions:\n"
46256" *\n"
46257" * The above copyright notice and this permission notice shall be included in\n"
46258" * all copies or substantial portions of the Software.\n"
46259" *\n"
46260" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
46261" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
46262" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
46263" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
46264" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
46265" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
46266" * THE SOFTWARE.\n"
46267" *\n"
46268" *===-----------------------------------------------------------------------===\n"
46269" */\n"
46270"\n"
46271"#ifndef __STDALIGN_H\n"
46272"#define __STDALIGN_H\n"
46273"\n"
46274"#ifndef __cplusplus\n"
46275"#define alignas _Alignas\n"
46276"#define alignof _Alignof\n"
46277"#endif\n"
46278"\n"
46279"#define __alignas_is_defined 1\n"
46280"#define __alignof_is_defined 1\n"
46281"\n"
46282"#endif /* __STDALIGN_H */\n"
46283"" } ,
46284 { "/builtins/stdarg.h" , "/*===---- stdarg.h - Variable argument handling ----------------------------===\n"
46285" *\n"
46286" * Copyright (c) 2008 Eli Friedman\n"
46287" *\n"
46288" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
46289" * of this software and associated documentation files (the \"Software\"), to deal\n"
46290" * in the Software without restriction, including without limitation the rights\n"
46291" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
46292" * copies of the Software, and to permit persons to whom the Software is\n"
46293" * furnished to do so, subject to the following conditions:\n"
46294" *\n"
46295" * The above copyright notice and this permission notice shall be included in\n"
46296" * all copies or substantial portions of the Software.\n"
46297" *\n"
46298" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
46299" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
46300" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
46301" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
46302" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
46303" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
46304" * THE SOFTWARE.\n"
46305" *\n"
46306" *===-----------------------------------------------------------------------===\n"
46307" */\n"
46308"\n"
46309"#ifndef __STDARG_H\n"
46310"#define __STDARG_H\n"
46311"\n"
46312"#ifndef _VA_LIST\n"
46313"typedef __builtin_va_list va_list;\n"
46314"#define _VA_LIST\n"
46315"#endif\n"
46316"#define va_start(ap, param) __builtin_va_start(ap, param)\n"
46317"#define va_end(ap) __builtin_va_end(ap)\n"
46318"#define va_arg(ap, type) __builtin_va_arg(ap, type)\n"
46319"\n"
46320"/* GCC always defines __va_copy, but does not define va_copy unless in c99 mode\n"
46321" * or -ansi is not specified, since it was not part of C90.\n"
46322" */\n"
46323"#define __va_copy(d,s) __builtin_va_copy(d,s)\n"
46324"\n"
46325"#if __STDC_VERSION__ >= 199901L || __cplusplus >= 201103L || !defined(__STRICT_ANSI__)\n"
46326"#define va_copy(dest, src) __builtin_va_copy(dest, src)\n"
46327"#endif\n"
46328"\n"
46329"#ifndef __GNUC_VA_LIST\n"
46330"#define __GNUC_VA_LIST 1\n"
46331"typedef __builtin_va_list __gnuc_va_list;\n"
46332"#endif\n"
46333"\n"
46334"#endif /* __STDARG_H */\n"
46335"" } ,
46336 { "/builtins/stdatomic.h" , "/*===---- stdatomic.h - Standard header for atomic types and operations -----===\n"
46337" *\n"
46338" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
46339" * of this software and associated documentation files (the \"Software\"), to deal\n"
46340" * in the Software without restriction, including without limitation the rights\n"
46341" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
46342" * copies of the Software, and to permit persons to whom the Software is\n"
46343" * furnished to do so, subject to the following conditions:\n"
46344" *\n"
46345" * The above copyright notice and this permission notice shall be included in\n"
46346" * all copies or substantial portions of the Software.\n"
46347" *\n"
46348" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
46349" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
46350" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
46351" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
46352" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
46353" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
46354" * THE SOFTWARE.\n"
46355" *\n"
46356" *===-----------------------------------------------------------------------===\n"
46357" */\n"
46358"\n"
46359"#ifndef __CLANG_STDATOMIC_H\n"
46360"#define __CLANG_STDATOMIC_H\n"
46361"\n"
46362"/* If we're hosted, fall back to the system's stdatomic.h. FreeBSD, for\n"
46363" * example, already has a Clang-compatible stdatomic.h header.\n"
46364" */\n"
46365"#if __STDC_HOSTED__ && __has_include_next(<stdatomic.h>)\n"
46366"# include_next <stdatomic.h>\n"
46367"#else\n"
46368"\n"
46369"#include <stddef.h>\n"
46370"#include <stdint.h>\n"
46371"\n"
46372"#ifdef __cplusplus\n"
46373"extern \"C\" {\n"
46374"#endif\n"
46375"\n"
46376"/* 7.17.1 Introduction */\n"
46377"\n"
46378"#define ATOMIC_BOOL_LOCK_FREE __CLANG_ATOMIC_BOOL_LOCK_FREE\n"
46379"#define ATOMIC_CHAR_LOCK_FREE __CLANG_ATOMIC_CHAR_LOCK_FREE\n"
46380"#define ATOMIC_CHAR16_T_LOCK_FREE __CLANG_ATOMIC_CHAR16_T_LOCK_FREE\n"
46381"#define ATOMIC_CHAR32_T_LOCK_FREE __CLANG_ATOMIC_CHAR32_T_LOCK_FREE\n"
46382"#define ATOMIC_WCHAR_T_LOCK_FREE __CLANG_ATOMIC_WCHAR_T_LOCK_FREE\n"
46383"#define ATOMIC_SHORT_LOCK_FREE __CLANG_ATOMIC_SHORT_LOCK_FREE\n"
46384"#define ATOMIC_INT_LOCK_FREE __CLANG_ATOMIC_INT_LOCK_FREE\n"
46385"#define ATOMIC_LONG_LOCK_FREE __CLANG_ATOMIC_LONG_LOCK_FREE\n"
46386"#define ATOMIC_LLONG_LOCK_FREE __CLANG_ATOMIC_LLONG_LOCK_FREE\n"
46387"#define ATOMIC_POINTER_LOCK_FREE __CLANG_ATOMIC_POINTER_LOCK_FREE\n"
46388"\n"
46389"/* 7.17.2 Initialization */\n"
46390"\n"
46391"#define ATOMIC_VAR_INIT(value) (value)\n"
46392"#define atomic_init __c11_atomic_init\n"
46393"\n"
46394"/* 7.17.3 Order and consistency */\n"
46395"\n"
46396"typedef enum memory_order {\n"
46397" memory_order_relaxed = __ATOMIC_RELAXED,\n"
46398" memory_order_consume = __ATOMIC_CONSUME,\n"
46399" memory_order_acquire = __ATOMIC_ACQUIRE,\n"
46400" memory_order_release = __ATOMIC_RELEASE,\n"
46401" memory_order_acq_rel = __ATOMIC_ACQ_REL,\n"
46402" memory_order_seq_cst = __ATOMIC_SEQ_CST\n"
46403"} memory_order;\n"
46404"\n"
46405"#define kill_dependency(y) (y)\n"
46406"\n"
46407"/* 7.17.4 Fences */\n"
46408"\n"
46409"/* These should be provided by the libc implementation. */\n"
46410"void atomic_thread_fence(memory_order);\n"
46411"void atomic_signal_fence(memory_order);\n"
46412"\n"
46413"#define atomic_thread_fence(order) __c11_atomic_thread_fence(order)\n"
46414"#define atomic_signal_fence(order) __c11_atomic_signal_fence(order)\n"
46415"\n"
46416"/* 7.17.5 Lock-free property */\n"
46417"\n"
46418"#define atomic_is_lock_free(obj) __c11_atomic_is_lock_free(sizeof(*(obj)))\n"
46419"\n"
46420"/* 7.17.6 Atomic integer types */\n"
46421"\n"
46422"#ifdef __cplusplus\n"
46423"typedef _Atomic(bool) atomic_bool;\n"
46424"#else\n"
46425"typedef _Atomic(_Bool) atomic_bool;\n"
46426"#endif\n"
46427"typedef _Atomic(char) atomic_char;\n"
46428"typedef _Atomic(signed char) atomic_schar;\n"
46429"typedef _Atomic(unsigned char) atomic_uchar;\n"
46430"typedef _Atomic(short) atomic_short;\n"
46431"typedef _Atomic(unsigned short) atomic_ushort;\n"
46432"typedef _Atomic(int) atomic_int;\n"
46433"typedef _Atomic(unsigned int) atomic_uint;\n"
46434"typedef _Atomic(long) atomic_long;\n"
46435"typedef _Atomic(unsigned long) atomic_ulong;\n"
46436"typedef _Atomic(long long) atomic_llong;\n"
46437"typedef _Atomic(unsigned long long) atomic_ullong;\n"
46438"typedef _Atomic(uint_least16_t) atomic_char16_t;\n"
46439"typedef _Atomic(uint_least32_t) atomic_char32_t;\n"
46440"typedef _Atomic(wchar_t) atomic_wchar_t;\n"
46441"typedef _Atomic(int_least8_t) atomic_int_least8_t;\n"
46442"typedef _Atomic(uint_least8_t) atomic_uint_least8_t;\n"
46443"typedef _Atomic(int_least16_t) atomic_int_least16_t;\n"
46444"typedef _Atomic(uint_least16_t) atomic_uint_least16_t;\n"
46445"typedef _Atomic(int_least32_t) atomic_int_least32_t;\n"
46446"typedef _Atomic(uint_least32_t) atomic_uint_least32_t;\n"
46447"typedef _Atomic(int_least64_t) atomic_int_least64_t;\n"
46448"typedef _Atomic(uint_least64_t) atomic_uint_least64_t;\n"
46449"typedef _Atomic(int_fast8_t) atomic_int_fast8_t;\n"
46450"typedef _Atomic(uint_fast8_t) atomic_uint_fast8_t;\n"
46451"typedef _Atomic(int_fast16_t) atomic_int_fast16_t;\n"
46452"typedef _Atomic(uint_fast16_t) atomic_uint_fast16_t;\n"
46453"typedef _Atomic(int_fast32_t) atomic_int_fast32_t;\n"
46454"typedef _Atomic(uint_fast32_t) atomic_uint_fast32_t;\n"
46455"typedef _Atomic(int_fast64_t) atomic_int_fast64_t;\n"
46456"typedef _Atomic(uint_fast64_t) atomic_uint_fast64_t;\n"
46457"typedef _Atomic(intptr_t) atomic_intptr_t;\n"
46458"typedef _Atomic(uintptr_t) atomic_uintptr_t;\n"
46459"typedef _Atomic(size_t) atomic_size_t;\n"
46460"typedef _Atomic(ptrdiff_t) atomic_ptrdiff_t;\n"
46461"typedef _Atomic(intmax_t) atomic_intmax_t;\n"
46462"typedef _Atomic(uintmax_t) atomic_uintmax_t;\n"
46463"\n"
46464"/* 7.17.7 Operations on atomic types */\n"
46465"\n"
46466"#define atomic_store(object, desired) __c11_atomic_store(object, desired, __ATOMIC_SEQ_CST)\n"
46467"#define atomic_store_explicit __c11_atomic_store\n"
46468"\n"
46469"#define atomic_load(object) __c11_atomic_load(object, __ATOMIC_SEQ_CST)\n"
46470"#define atomic_load_explicit __c11_atomic_load\n"
46471"\n"
46472"#define atomic_exchange(object, desired) __c11_atomic_exchange(object, desired, __ATOMIC_SEQ_CST)\n"
46473"#define atomic_exchange_explicit __c11_atomic_exchange\n"
46474"\n"
46475"#define atomic_compare_exchange_strong(object, expected, desired) __c11_atomic_compare_exchange_strong(object, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)\n"
46476"#define atomic_compare_exchange_strong_explicit __c11_atomic_compare_exchange_strong\n"
46477"\n"
46478"#define atomic_compare_exchange_weak(object, expected, desired) __c11_atomic_compare_exchange_weak(object, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)\n"
46479"#define atomic_compare_exchange_weak_explicit __c11_atomic_compare_exchange_weak\n"
46480"\n"
46481"#define atomic_fetch_add(object, operand) __c11_atomic_fetch_add(object, operand, __ATOMIC_SEQ_CST)\n"
46482"#define atomic_fetch_add_explicit __c11_atomic_fetch_add\n"
46483"\n"
46484"#define atomic_fetch_sub(object, operand) __c11_atomic_fetch_sub(object, operand, __ATOMIC_SEQ_CST)\n"
46485"#define atomic_fetch_sub_explicit __c11_atomic_fetch_sub\n"
46486"\n"
46487"#define atomic_fetch_or(object, operand) __c11_atomic_fetch_or(object, operand, __ATOMIC_SEQ_CST)\n"
46488"#define atomic_fetch_or_explicit __c11_atomic_fetch_or\n"
46489"\n"
46490"#define atomic_fetch_xor(object, operand) __c11_atomic_fetch_xor(object, operand, __ATOMIC_SEQ_CST)\n"
46491"#define atomic_fetch_xor_explicit __c11_atomic_fetch_xor\n"
46492"\n"
46493"#define atomic_fetch_and(object, operand) __c11_atomic_fetch_and(object, operand, __ATOMIC_SEQ_CST)\n"
46494"#define atomic_fetch_and_explicit __c11_atomic_fetch_and\n"
46495"\n"
46496"/* 7.17.8 Atomic flag type and operations */\n"
46497"\n"
46498"typedef struct atomic_flag { atomic_bool _Value; } atomic_flag;\n"
46499"\n"
46500"#define ATOMIC_FLAG_INIT { 0 }\n"
46501"\n"
46502"/* These should be provided by the libc implementation. */\n"
46503"#ifdef __cplusplus\n"
46504"bool atomic_flag_test_and_set(volatile atomic_flag *);\n"
46505"bool atomic_flag_test_and_set_explicit(volatile atomic_flag *, memory_order);\n"
46506"#else\n"
46507"_Bool atomic_flag_test_and_set(volatile atomic_flag *);\n"
46508"_Bool atomic_flag_test_and_set_explicit(volatile atomic_flag *, memory_order);\n"
46509"#endif\n"
46510"void atomic_flag_clear(volatile atomic_flag *);\n"
46511"void atomic_flag_clear_explicit(volatile atomic_flag *, memory_order);\n"
46512"\n"
46513"#define atomic_flag_test_and_set(object) __c11_atomic_exchange(&(object)->_Value, 1, __ATOMIC_SEQ_CST)\n"
46514"#define atomic_flag_test_and_set_explicit(object, order) __c11_atomic_exchange(&(object)->_Value, 1, order)\n"
46515"\n"
46516"#define atomic_flag_clear(object) __c11_atomic_store(&(object)->_Value, 0, __ATOMIC_SEQ_CST)\n"
46517"#define atomic_flag_clear_explicit(object, order) __c11_atomic_store(&(object)->_Value, 0, order)\n"
46518"\n"
46519"#ifdef __cplusplus\n"
46520"}\n"
46521"#endif\n"
46522"\n"
46523"#endif /* __STDC_HOSTED__ */\n"
46524"#endif /* __CLANG_STDATOMIC_H */\n"
46525"\n"
46526"" } ,
46527 { "/builtins/stdbool.h" , "/*===---- stdbool.h - Standard header for booleans -------------------------===\n"
46528" *\n"
46529" * Copyright (c) 2008 Eli Friedman\n"
46530" *\n"
46531" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
46532" * of this software and associated documentation files (the \"Software\"), to deal\n"
46533" * in the Software without restriction, including without limitation the rights\n"
46534" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
46535" * copies of the Software, and to permit persons to whom the Software is\n"
46536" * furnished to do so, subject to the following conditions:\n"
46537" *\n"
46538" * The above copyright notice and this permission notice shall be included in\n"
46539" * all copies or substantial portions of the Software.\n"
46540" *\n"
46541" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
46542" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
46543" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
46544" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
46545" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
46546" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
46547" * THE SOFTWARE.\n"
46548" *\n"
46549" *===-----------------------------------------------------------------------===\n"
46550" */\n"
46551"\n"
46552"#ifndef __STDBOOL_H\n"
46553"#define __STDBOOL_H\n"
46554"\n"
46555"/* Don't define bool, true, and false in C++, except as a GNU extension. */\n"
46556"#ifndef __cplusplus\n"
46557"#define bool _Bool\n"
46558"#define true 1\n"
46559"#define false 0\n"
46560"#elif defined(__GNUC__) && !defined(__STRICT_ANSI__)\n"
46561"/* Define _Bool as a GNU extension. */\n"
46562"#define _Bool bool\n"
46563"#if __cplusplus < 201103L\n"
46564"/* For C++98, define bool, false, true as a GNU extension. */\n"
46565"#define bool bool\n"
46566"#define false false\n"
46567"#define true true\n"
46568"#endif\n"
46569"#endif\n"
46570"\n"
46571"#define __bool_true_false_are_defined 1\n"
46572"\n"
46573"#endif /* __STDBOOL_H */\n"
46574"" } ,
46575 { "/builtins/stddef.h" , "/*===---- stddef.h - Basic type definitions --------------------------------===\n"
46576" *\n"
46577" * Copyright (c) 2008 Eli Friedman\n"
46578" *\n"
46579" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
46580" * of this software and associated documentation files (the \"Software\"), to deal\n"
46581" * in the Software without restriction, including without limitation the rights\n"
46582" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
46583" * copies of the Software, and to permit persons to whom the Software is\n"
46584" * furnished to do so, subject to the following conditions:\n"
46585" *\n"
46586" * The above copyright notice and this permission notice shall be included in\n"
46587" * all copies or substantial portions of the Software.\n"
46588" *\n"
46589" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
46590" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
46591" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
46592" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
46593" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
46594" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
46595" * THE SOFTWARE.\n"
46596" *\n"
46597" *===-----------------------------------------------------------------------===\n"
46598" */\n"
46599"\n"
46600"#if !defined(__STDDEF_H) || defined(__need_ptrdiff_t) || \\\n"
46601" defined(__need_size_t) || defined(__need_wchar_t) || \\\n"
46602" defined(__need_NULL) || defined(__need_wint_t)\n"
46603"\n"
46604"#if !defined(__need_ptrdiff_t) && !defined(__need_size_t) && \\\n"
46605" !defined(__need_wchar_t) && !defined(__need_NULL) && \\\n"
46606" !defined(__need_wint_t)\n"
46607"/* Always define miscellaneous pieces when modules are available. */\n"
46608"#if !__has_feature(modules)\n"
46609"#define __STDDEF_H\n"
46610"#endif\n"
46611"#define __need_ptrdiff_t\n"
46612"#define __need_size_t\n"
46613"#define __need_wchar_t\n"
46614"#define __need_NULL\n"
46615"#define __need_STDDEF_H_misc\n"
46616"/* __need_wint_t is intentionally not defined here. */\n"
46617"#endif\n"
46618"\n"
46619"#if defined(__need_ptrdiff_t)\n"
46620"#if !defined(_PTRDIFF_T) || __has_feature(modules)\n"
46621"/* Always define ptrdiff_t when modules are available. */\n"
46622"#if !__has_feature(modules)\n"
46623"#define _PTRDIFF_T\n"
46624"#endif\n"
46625"typedef __PTRDIFF_TYPE__ ptrdiff_t;\n"
46626"#endif\n"
46627"#undef __need_ptrdiff_t\n"
46628"#endif /* defined(__need_ptrdiff_t) */\n"
46629"\n"
46630"#if defined(__need_size_t)\n"
46631"#if !defined(_SIZE_T) || __has_feature(modules)\n"
46632"/* Always define size_t when modules are available. */\n"
46633"#if !__has_feature(modules)\n"
46634"#define _SIZE_T\n"
46635"#endif\n"
46636"typedef __SIZE_TYPE__ size_t;\n"
46637"#endif\n"
46638"#undef __need_size_t\n"
46639"#endif /*defined(__need_size_t) */\n"
46640"\n"
46641"#if defined(__need_STDDEF_H_misc)\n"
46642"/* ISO9899:2011 7.20 (C11 Annex K): Define rsize_t if __STDC_WANT_LIB_EXT1__ is\n"
46643" * enabled. */\n"
46644"#if (defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1 && \\\n"
46645" !defined(_RSIZE_T)) || __has_feature(modules)\n"
46646"/* Always define rsize_t when modules are available. */\n"
46647"#if !__has_feature(modules)\n"
46648"#define _RSIZE_T\n"
46649"#endif\n"
46650"typedef __SIZE_TYPE__ rsize_t;\n"
46651"#endif\n"
46652"#endif /* defined(__need_STDDEF_H_misc) */\n"
46653"\n"
46654"#if defined(__need_wchar_t)\n"
46655"#ifndef __cplusplus\n"
46656"/* Always define wchar_t when modules are available. */\n"
46657"#if !defined(_WCHAR_T) || __has_feature(modules)\n"
46658"#if !__has_feature(modules)\n"
46659"#define _WCHAR_T\n"
46660"#if defined(_MSC_EXTENSIONS)\n"
46661"#define _WCHAR_T_DEFINED\n"
46662"#endif\n"
46663"#endif\n"
46664"typedef __WCHAR_TYPE__ wchar_t;\n"
46665"#endif\n"
46666"#endif\n"
46667"#undef __need_wchar_t\n"
46668"#endif /* defined(__need_wchar_t) */\n"
46669"\n"
46670"#if defined(__need_NULL)\n"
46671"#undef NULL\n"
46672"#ifdef __cplusplus\n"
46673"# if !defined(__MINGW32__) && !defined(_MSC_VER)\n"
46674"# define NULL __null\n"
46675"# else\n"
46676"# define NULL 0\n"
46677"# endif\n"
46678"#else\n"
46679"# define NULL ((void*)0)\n"
46680"#endif\n"
46681"#ifdef __cplusplus\n"
46682"#if defined(_MSC_EXTENSIONS) && defined(_NATIVE_NULLPTR_SUPPORTED)\n"
46683"namespace std { typedef decltype(nullptr) nullptr_t; }\n"
46684"using ::std::nullptr_t;\n"
46685"#endif\n"
46686"#endif\n"
46687"#undef __need_NULL\n"
46688"#endif /* defined(__need_NULL) */\n"
46689"\n"
46690"#if defined(__need_STDDEF_H_misc)\n"
46691"#if __STDC_VERSION__ >= 201112L || __cplusplus >= 201103L\n"
46692"#include \"__stddef_max_align_t.h\"\n"
46693"#endif\n"
46694"#define offsetof(t, d) __builtin_offsetof(t, d)\n"
46695"#undef __need_STDDEF_H_misc\n"
46696"#endif /* defined(__need_STDDEF_H_misc) */\n"
46697"\n"
46698"/* Some C libraries expect to see a wint_t here. Others (notably MinGW) will use\n"
46699"__WINT_TYPE__ directly; accommodate both by requiring __need_wint_t */\n"
46700"#if defined(__need_wint_t)\n"
46701"/* Always define wint_t when modules are available. */\n"
46702"#if !defined(_WINT_T) || __has_feature(modules)\n"
46703"#if !__has_feature(modules)\n"
46704"#define _WINT_T\n"
46705"#endif\n"
46706"typedef __WINT_TYPE__ wint_t;\n"
46707"#endif\n"
46708"#undef __need_wint_t\n"
46709"#endif /* __need_wint_t */\n"
46710"\n"
46711"#endif\n"
46712"" } ,
46713 { "/builtins/stdint.h" , "/*===---- stdint.h - Standard header for sized integer types --------------===*\\\n"
46714" *\n"
46715" * Copyright (c) 2009 Chris Lattner\n"
46716" *\n"
46717" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
46718" * of this software and associated documentation files (the \"Software\"), to deal\n"
46719" * in the Software without restriction, including without limitation the rights\n"
46720" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
46721" * copies of the Software, and to permit persons to whom the Software is\n"
46722" * furnished to do so, subject to the following conditions:\n"
46723" *\n"
46724" * The above copyright notice and this permission notice shall be included in\n"
46725" * all copies or substantial portions of the Software.\n"
46726" *\n"
46727" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
46728" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
46729" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
46730" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
46731" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
46732" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
46733" * THE SOFTWARE.\n"
46734" *\n"
46735"\\*===----------------------------------------------------------------------===*/\n"
46736"\n"
46737"\n"
46738"/* If we're hosted, fall back to the system's stdint.h, which might have\n"
46739" * additional definitions.\n"
46740" */\n"
46741"#if __STDC_HOSTED__ && __has_include_next(<stdint.h>)\n"
46742"\n"
46743"// C99 7.18.3 Limits of other integer types\n"
46744"//\n"
46745"// Footnote 219, 220: C++ implementations should define these macros only when\n"
46746"// __STDC_LIMIT_MACROS is defined before <stdint.h> is included.\n"
46747"//\n"
46748"// Footnote 222: C++ implementations should define these macros only when\n"
46749"// __STDC_CONSTANT_MACROS is defined before <stdint.h> is included.\n"
46750"//\n"
46751"// C++11 [cstdint.syn]p2:\n"
46752"//\n"
46753"// The macros defined by <cstdint> are provided unconditionally. In particular,\n"
46754"// the symbols __STDC_LIMIT_MACROS and __STDC_CONSTANT_MACROS (mentioned in\n"
46755"// footnotes 219, 220, and 222 in the C standard) play no role in C++.\n"
46756"//\n"
46757"// C11 removed the problematic footnotes.\n"
46758"//\n"
46759"// Work around this inconsistency by always defining those macros in C++ mode,\n"
46760"// so that a C library implementation which follows the C99 standard can be\n"
46761"// used in C++.\n"
46762"# ifdef __cplusplus\n"
46763"# if !defined(__STDC_LIMIT_MACROS)\n"
46764"# define __STDC_LIMIT_MACROS\n"
46765"# define __STDC_LIMIT_MACROS_DEFINED_BY_CLANG\n"
46766"# endif\n"
46767"# if !defined(__STDC_CONSTANT_MACROS)\n"
46768"# define __STDC_CONSTANT_MACROS\n"
46769"# define __STDC_CONSTANT_MACROS_DEFINED_BY_CLANG\n"
46770"# endif\n"
46771"# endif\n"
46772"\n"
46773"# include_next <stdint.h>\n"
46774"\n"
46775"# ifdef __STDC_LIMIT_MACROS_DEFINED_BY_CLANG\n"
46776"# undef __STDC_LIMIT_MACROS\n"
46777"# undef __STDC_LIMIT_MACROS_DEFINED_BY_CLANG\n"
46778"# endif\n"
46779"# ifdef __STDC_CONSTANT_MACROS_DEFINED_BY_CLANG\n"
46780"# undef __STDC_CONSTANT_MACROS\n"
46781"# undef __STDC_CONSTANT_MACROS_DEFINED_BY_CLANG\n"
46782"# endif\n"
46783"\n"
46784"#else\n"
46785"#ifndef __CLANG_STDINT_H2\n"
46786"#define __CLANG_STDINT_H2\n"
46787"\n"
46788"/* C99 7.18.1.1 Exact-width integer types.\n"
46789" * C99 7.18.1.2 Minimum-width integer types.\n"
46790" * C99 7.18.1.3 Fastest minimum-width integer types.\n"
46791" *\n"
46792" * The standard requires that exact-width type be defined for 8-, 16-, 32-, and\n"
46793" * 64-bit types if they are implemented. Other exact width types are optional.\n"
46794" * This implementation defines an exact-width types for every integer width\n"
46795" * that is represented in the standard integer types.\n"
46796" *\n"
46797" * The standard also requires minimum-width types be defined for 8-, 16-, 32-,\n"
46798" * and 64-bit widths regardless of whether there are corresponding exact-width\n"
46799" * types.\n"
46800" *\n"
46801" * To accommodate targets that are missing types that are exactly 8, 16, 32, or\n"
46802" * 64 bits wide, this implementation takes an approach of cascading\n"
46803" * redefinitions, redefining __int_leastN_t to successively smaller exact-width\n"
46804" * types. It is therefore important that the types are defined in order of\n"
46805" * descending widths.\n"
46806" *\n"
46807" * We currently assume that the minimum-width types and the fastest\n"
46808" * minimum-width types are the same. This is allowed by the standard, but is\n"
46809" * suboptimal.\n"
46810" *\n"
46811" * In violation of the standard, some targets do not implement a type that is\n"
46812" * wide enough to represent all of the required widths (8-, 16-, 32-, 64-bit).\n"
46813" * To accommodate these targets, a required minimum-width type is only\n"
46814" * defined if there exists an exact-width type of equal or greater width.\n"
46815" */\n"
46816"\n"
46817"#ifdef __INT64_TYPE__\n"
46818"# ifndef __int8_t_defined /* glibc sys/types.h also defines int64_t*/\n"
46819"typedef __INT64_TYPE__ int64_t;\n"
46820"# endif /* __int8_t_defined */\n"
46821"typedef __UINT64_TYPE__ uint64_t;\n"
46822"# define __int_least64_t int64_t\n"
46823"# define __uint_least64_t uint64_t\n"
46824"# define __int_least32_t int64_t\n"
46825"# define __uint_least32_t uint64_t\n"
46826"# define __int_least16_t int64_t\n"
46827"# define __uint_least16_t uint64_t\n"
46828"# define __int_least8_t int64_t\n"
46829"# define __uint_least8_t uint64_t\n"
46830"#endif /* __INT64_TYPE__ */\n"
46831"\n"
46832"#ifdef __int_least64_t\n"
46833"typedef __int_least64_t int_least64_t;\n"
46834"typedef __uint_least64_t uint_least64_t;\n"
46835"typedef __int_least64_t int_fast64_t;\n"
46836"typedef __uint_least64_t uint_fast64_t;\n"
46837"#endif /* __int_least64_t */\n"
46838"\n"
46839"#ifdef __INT56_TYPE__\n"
46840"typedef __INT56_TYPE__ int56_t;\n"
46841"typedef __UINT56_TYPE__ uint56_t;\n"
46842"typedef int56_t int_least56_t;\n"
46843"typedef uint56_t uint_least56_t;\n"
46844"typedef int56_t int_fast56_t;\n"
46845"typedef uint56_t uint_fast56_t;\n"
46846"# define __int_least32_t int56_t\n"
46847"# define __uint_least32_t uint56_t\n"
46848"# define __int_least16_t int56_t\n"
46849"# define __uint_least16_t uint56_t\n"
46850"# define __int_least8_t int56_t\n"
46851"# define __uint_least8_t uint56_t\n"
46852"#endif /* __INT56_TYPE__ */\n"
46853"\n"
46854"\n"
46855"#ifdef __INT48_TYPE__\n"
46856"typedef __INT48_TYPE__ int48_t;\n"
46857"typedef __UINT48_TYPE__ uint48_t;\n"
46858"typedef int48_t int_least48_t;\n"
46859"typedef uint48_t uint_least48_t;\n"
46860"typedef int48_t int_fast48_t;\n"
46861"typedef uint48_t uint_fast48_t;\n"
46862"# define __int_least32_t int48_t\n"
46863"# define __uint_least32_t uint48_t\n"
46864"# define __int_least16_t int48_t\n"
46865"# define __uint_least16_t uint48_t\n"
46866"# define __int_least8_t int48_t\n"
46867"# define __uint_least8_t uint48_t\n"
46868"#endif /* __INT48_TYPE__ */\n"
46869"\n"
46870"\n"
46871"#ifdef __INT40_TYPE__\n"
46872"typedef __INT40_TYPE__ int40_t;\n"
46873"typedef __UINT40_TYPE__ uint40_t;\n"
46874"typedef int40_t int_least40_t;\n"
46875"typedef uint40_t uint_least40_t;\n"
46876"typedef int40_t int_fast40_t;\n"
46877"typedef uint40_t uint_fast40_t;\n"
46878"# define __int_least32_t int40_t\n"
46879"# define __uint_least32_t uint40_t\n"
46880"# define __int_least16_t int40_t\n"
46881"# define __uint_least16_t uint40_t\n"
46882"# define __int_least8_t int40_t\n"
46883"# define __uint_least8_t uint40_t\n"
46884"#endif /* __INT40_TYPE__ */\n"
46885"\n"
46886"\n"
46887"#ifdef __INT32_TYPE__\n"
46888"\n"
46889"# ifndef __int8_t_defined /* glibc sys/types.h also defines int32_t*/\n"
46890"typedef __INT32_TYPE__ int32_t;\n"
46891"# endif /* __int8_t_defined */\n"
46892"\n"
46893"# ifndef __uint32_t_defined /* more glibc compatibility */\n"
46894"# define __uint32_t_defined\n"
46895"typedef __UINT32_TYPE__ uint32_t;\n"
46896"# endif /* __uint32_t_defined */\n"
46897"\n"
46898"# define __int_least32_t int32_t\n"
46899"# define __uint_least32_t uint32_t\n"
46900"# define __int_least16_t int32_t\n"
46901"# define __uint_least16_t uint32_t\n"
46902"# define __int_least8_t int32_t\n"
46903"# define __uint_least8_t uint32_t\n"
46904"#endif /* __INT32_TYPE__ */\n"
46905"\n"
46906"#ifdef __int_least32_t\n"
46907"typedef __int_least32_t int_least32_t;\n"
46908"typedef __uint_least32_t uint_least32_t;\n"
46909"typedef __int_least32_t int_fast32_t;\n"
46910"typedef __uint_least32_t uint_fast32_t;\n"
46911"#endif /* __int_least32_t */\n"
46912"\n"
46913"#ifdef __INT24_TYPE__\n"
46914"typedef __INT24_TYPE__ int24_t;\n"
46915"typedef __UINT24_TYPE__ uint24_t;\n"
46916"typedef int24_t int_least24_t;\n"
46917"typedef uint24_t uint_least24_t;\n"
46918"typedef int24_t int_fast24_t;\n"
46919"typedef uint24_t uint_fast24_t;\n"
46920"# define __int_least16_t int24_t\n"
46921"# define __uint_least16_t uint24_t\n"
46922"# define __int_least8_t int24_t\n"
46923"# define __uint_least8_t uint24_t\n"
46924"#endif /* __INT24_TYPE__ */\n"
46925"\n"
46926"#ifdef __INT16_TYPE__\n"
46927"#ifndef __int8_t_defined /* glibc sys/types.h also defines int16_t*/\n"
46928"typedef __INT16_TYPE__ int16_t;\n"
46929"#endif /* __int8_t_defined */\n"
46930"typedef __UINT16_TYPE__ uint16_t;\n"
46931"# define __int_least16_t int16_t\n"
46932"# define __uint_least16_t uint16_t\n"
46933"# define __int_least8_t int16_t\n"
46934"# define __uint_least8_t uint16_t\n"
46935"#endif /* __INT16_TYPE__ */\n"
46936"\n"
46937"#ifdef __int_least16_t\n"
46938"typedef __int_least16_t int_least16_t;\n"
46939"typedef __uint_least16_t uint_least16_t;\n"
46940"typedef __int_least16_t int_fast16_t;\n"
46941"typedef __uint_least16_t uint_fast16_t;\n"
46942"#endif /* __int_least16_t */\n"
46943"\n"
46944"\n"
46945"#ifdef __INT8_TYPE__\n"
46946"#ifndef __int8_t_defined /* glibc sys/types.h also defines int8_t*/\n"
46947"typedef __INT8_TYPE__ int8_t;\n"
46948"#endif /* __int8_t_defined */\n"
46949"typedef __UINT8_TYPE__ uint8_t;\n"
46950"# define __int_least8_t int8_t\n"
46951"# define __uint_least8_t uint8_t\n"
46952"#endif /* __INT8_TYPE__ */\n"
46953"\n"
46954"#ifdef __int_least8_t\n"
46955"typedef __int_least8_t int_least8_t;\n"
46956"typedef __uint_least8_t uint_least8_t;\n"
46957"typedef __int_least8_t int_fast8_t;\n"
46958"typedef __uint_least8_t uint_fast8_t;\n"
46959"#endif /* __int_least8_t */\n"
46960"\n"
46961"/* prevent glibc sys/types.h from defining conflicting types */\n"
46962"#ifndef __int8_t_defined\n"
46963"# define __int8_t_defined\n"
46964"#endif /* __int8_t_defined */\n"
46965"\n"
46966"/* C99 7.18.1.4 Integer types capable of holding object pointers.\n"
46967" */\n"
46968"#define __stdint_join3(a,b,c) a ## b ## c\n"
46969"\n"
46970"#ifndef _INTPTR_T\n"
46971"#ifndef __intptr_t_defined\n"
46972"typedef __INTPTR_TYPE__ intptr_t;\n"
46973"#define __intptr_t_defined\n"
46974"#define _INTPTR_T\n"
46975"#endif\n"
46976"#endif\n"
46977"\n"
46978"#ifndef _UINTPTR_T\n"
46979"typedef __UINTPTR_TYPE__ uintptr_t;\n"
46980"#define _UINTPTR_T\n"
46981"#endif\n"
46982"\n"
46983"/* C99 7.18.1.5 Greatest-width integer types.\n"
46984" */\n"
46985"typedef __INTMAX_TYPE__ intmax_t;\n"
46986"typedef __UINTMAX_TYPE__ uintmax_t;\n"
46987"\n"
46988"/* C99 7.18.4 Macros for minimum-width integer constants.\n"
46989" *\n"
46990" * The standard requires that integer constant macros be defined for all the\n"
46991" * minimum-width types defined above. As 8-, 16-, 32-, and 64-bit minimum-width\n"
46992" * types are required, the corresponding integer constant macros are defined\n"
46993" * here. This implementation also defines minimum-width types for every other\n"
46994" * integer width that the target implements, so corresponding macros are\n"
46995" * defined below, too.\n"
46996" *\n"
46997" * These macros are defined using the same successive-shrinking approach as\n"
46998" * the type definitions above. It is likewise important that macros are defined\n"
46999" * in order of decending width.\n"
47000" *\n"
47001" * Note that C++ should not check __STDC_CONSTANT_MACROS here, contrary to the\n"
47002" * claims of the C standard (see C++ 18.3.1p2, [cstdint.syn]).\n"
47003" */\n"
47004"\n"
47005"#define __int_c_join(a, b) a ## b\n"
47006"#define __int_c(v, suffix) __int_c_join(v, suffix)\n"
47007"#define __uint_c(v, suffix) __int_c_join(v##U, suffix)\n"
47008"\n"
47009"\n"
47010"#ifdef __INT64_TYPE__\n"
47011"# ifdef __INT64_C_SUFFIX__\n"
47012"# define __int64_c_suffix __INT64_C_SUFFIX__\n"
47013"# define __int32_c_suffix __INT64_C_SUFFIX__\n"
47014"# define __int16_c_suffix __INT64_C_SUFFIX__\n"
47015"# define __int8_c_suffix __INT64_C_SUFFIX__\n"
47016"# else\n"
47017"# undef __int64_c_suffix\n"
47018"# undef __int32_c_suffix\n"
47019"# undef __int16_c_suffix\n"
47020"# undef __int8_c_suffix\n"
47021"# endif /* __INT64_C_SUFFIX__ */\n"
47022"#endif /* __INT64_TYPE__ */\n"
47023"\n"
47024"#ifdef __int_least64_t\n"
47025"# ifdef __int64_c_suffix\n"
47026"# define INT64_C(v) __int_c(v, __int64_c_suffix)\n"
47027"# define UINT64_C(v) __uint_c(v, __int64_c_suffix)\n"
47028"# else\n"
47029"# define INT64_C(v) v\n"
47030"# define UINT64_C(v) v ## U\n"
47031"# endif /* __int64_c_suffix */\n"
47032"#endif /* __int_least64_t */\n"
47033"\n"
47034"\n"
47035"#ifdef __INT56_TYPE__\n"
47036"# ifdef __INT56_C_SUFFIX__\n"
47037"# define INT56_C(v) __int_c(v, __INT56_C_SUFFIX__)\n"
47038"# define UINT56_C(v) __uint_c(v, __INT56_C_SUFFIX__)\n"
47039"# define __int32_c_suffix __INT56_C_SUFFIX__\n"
47040"# define __int16_c_suffix __INT56_C_SUFFIX__\n"
47041"# define __int8_c_suffix __INT56_C_SUFFIX__\n"
47042"# else\n"
47043"# define INT56_C(v) v\n"
47044"# define UINT56_C(v) v ## U\n"
47045"# undef __int32_c_suffix\n"
47046"# undef __int16_c_suffix\n"
47047"# undef __int8_c_suffix\n"
47048"# endif /* __INT56_C_SUFFIX__ */\n"
47049"#endif /* __INT56_TYPE__ */\n"
47050"\n"
47051"\n"
47052"#ifdef __INT48_TYPE__\n"
47053"# ifdef __INT48_C_SUFFIX__\n"
47054"# define INT48_C(v) __int_c(v, __INT48_C_SUFFIX__)\n"
47055"# define UINT48_C(v) __uint_c(v, __INT48_C_SUFFIX__)\n"
47056"# define __int32_c_suffix __INT48_C_SUFFIX__\n"
47057"# define __int16_c_suffix __INT48_C_SUFFIX__\n"
47058"# define __int8_c_suffix __INT48_C_SUFFIX__\n"
47059"# else\n"
47060"# define INT48_C(v) v\n"
47061"# define UINT48_C(v) v ## U\n"
47062"# undef __int32_c_suffix\n"
47063"# undef __int16_c_suffix\n"
47064"# undef __int8_c_suffix\n"
47065"# endif /* __INT48_C_SUFFIX__ */\n"
47066"#endif /* __INT48_TYPE__ */\n"
47067"\n"
47068"\n"
47069"#ifdef __INT40_TYPE__\n"
47070"# ifdef __INT40_C_SUFFIX__\n"
47071"# define INT40_C(v) __int_c(v, __INT40_C_SUFFIX__)\n"
47072"# define UINT40_C(v) __uint_c(v, __INT40_C_SUFFIX__)\n"
47073"# define __int32_c_suffix __INT40_C_SUFFIX__\n"
47074"# define __int16_c_suffix __INT40_C_SUFFIX__\n"
47075"# define __int8_c_suffix __INT40_C_SUFFIX__\n"
47076"# else\n"
47077"# define INT40_C(v) v\n"
47078"# define UINT40_C(v) v ## U\n"
47079"# undef __int32_c_suffix\n"
47080"# undef __int16_c_suffix\n"
47081"# undef __int8_c_suffix\n"
47082"# endif /* __INT40_C_SUFFIX__ */\n"
47083"#endif /* __INT40_TYPE__ */\n"
47084"\n"
47085"\n"
47086"#ifdef __INT32_TYPE__\n"
47087"# ifdef __INT32_C_SUFFIX__\n"
47088"# define __int32_c_suffix __INT32_C_SUFFIX__\n"
47089"# define __int16_c_suffix __INT32_C_SUFFIX__\n"
47090"# define __int8_c_suffix __INT32_C_SUFFIX__\n"
47091"#else\n"
47092"# undef __int32_c_suffix\n"
47093"# undef __int16_c_suffix\n"
47094"# undef __int8_c_suffix\n"
47095"# endif /* __INT32_C_SUFFIX__ */\n"
47096"#endif /* __INT32_TYPE__ */\n"
47097"\n"
47098"#ifdef __int_least32_t\n"
47099"# ifdef __int32_c_suffix\n"
47100"# define INT32_C(v) __int_c(v, __int32_c_suffix)\n"
47101"# define UINT32_C(v) __uint_c(v, __int32_c_suffix)\n"
47102"# else\n"
47103"# define INT32_C(v) v\n"
47104"# define UINT32_C(v) v ## U\n"
47105"# endif /* __int32_c_suffix */\n"
47106"#endif /* __int_least32_t */\n"
47107"\n"
47108"\n"
47109"#ifdef __INT24_TYPE__\n"
47110"# ifdef __INT24_C_SUFFIX__\n"
47111"# define INT24_C(v) __int_c(v, __INT24_C_SUFFIX__)\n"
47112"# define UINT24_C(v) __uint_c(v, __INT24_C_SUFFIX__)\n"
47113"# define __int16_c_suffix __INT24_C_SUFFIX__\n"
47114"# define __int8_c_suffix __INT24_C_SUFFIX__\n"
47115"# else\n"
47116"# define INT24_C(v) v\n"
47117"# define UINT24_C(v) v ## U\n"
47118"# undef __int16_c_suffix\n"
47119"# undef __int8_c_suffix\n"
47120"# endif /* __INT24_C_SUFFIX__ */\n"
47121"#endif /* __INT24_TYPE__ */\n"
47122"\n"
47123"\n"
47124"#ifdef __INT16_TYPE__\n"
47125"# ifdef __INT16_C_SUFFIX__\n"
47126"# define __int16_c_suffix __INT16_C_SUFFIX__\n"
47127"# define __int8_c_suffix __INT16_C_SUFFIX__\n"
47128"#else\n"
47129"# undef __int16_c_suffix\n"
47130"# undef __int8_c_suffix\n"
47131"# endif /* __INT16_C_SUFFIX__ */\n"
47132"#endif /* __INT16_TYPE__ */\n"
47133"\n"
47134"#ifdef __int_least16_t\n"
47135"# ifdef __int16_c_suffix\n"
47136"# define INT16_C(v) __int_c(v, __int16_c_suffix)\n"
47137"# define UINT16_C(v) __uint_c(v, __int16_c_suffix)\n"
47138"# else\n"
47139"# define INT16_C(v) v\n"
47140"# define UINT16_C(v) v ## U\n"
47141"# endif /* __int16_c_suffix */\n"
47142"#endif /* __int_least16_t */\n"
47143"\n"
47144"\n"
47145"#ifdef __INT8_TYPE__\n"
47146"# ifdef __INT8_C_SUFFIX__\n"
47147"# define __int8_c_suffix __INT8_C_SUFFIX__\n"
47148"#else\n"
47149"# undef __int8_c_suffix\n"
47150"# endif /* __INT8_C_SUFFIX__ */\n"
47151"#endif /* __INT8_TYPE__ */\n"
47152"\n"
47153"#ifdef __int_least8_t\n"
47154"# ifdef __int8_c_suffix\n"
47155"# define INT8_C(v) __int_c(v, __int8_c_suffix)\n"
47156"# define UINT8_C(v) __uint_c(v, __int8_c_suffix)\n"
47157"# else\n"
47158"# define INT8_C(v) v\n"
47159"# define UINT8_C(v) v ## U\n"
47160"# endif /* __int8_c_suffix */\n"
47161"#endif /* __int_least8_t */\n"
47162"\n"
47163"\n"
47164"/* C99 7.18.2.1 Limits of exact-width integer types.\n"
47165" * C99 7.18.2.2 Limits of minimum-width integer types.\n"
47166" * C99 7.18.2.3 Limits of fastest minimum-width integer types.\n"
47167" *\n"
47168" * The presence of limit macros are completely optional in C99. This\n"
47169" * implementation defines limits for all of the types (exact- and\n"
47170" * minimum-width) that it defines above, using the limits of the minimum-width\n"
47171" * type for any types that do not have exact-width representations.\n"
47172" *\n"
47173" * As in the type definitions, this section takes an approach of\n"
47174" * successive-shrinking to determine which limits to use for the standard (8,\n"
47175" * 16, 32, 64) bit widths when they don't have exact representations. It is\n"
47176" * therefore important that the definitions be kept in order of decending\n"
47177" * widths.\n"
47178" *\n"
47179" * Note that C++ should not check __STDC_LIMIT_MACROS here, contrary to the\n"
47180" * claims of the C standard (see C++ 18.3.1p2, [cstdint.syn]).\n"
47181" */\n"
47182"\n"
47183"#ifdef __INT64_TYPE__\n"
47184"# define INT64_MAX INT64_C( 9223372036854775807)\n"
47185"# define INT64_MIN (-INT64_C( 9223372036854775807)-1)\n"
47186"# define UINT64_MAX UINT64_C(18446744073709551615)\n"
47187"# define __INT_LEAST64_MIN INT64_MIN\n"
47188"# define __INT_LEAST64_MAX INT64_MAX\n"
47189"# define __UINT_LEAST64_MAX UINT64_MAX\n"
47190"# define __INT_LEAST32_MIN INT64_MIN\n"
47191"# define __INT_LEAST32_MAX INT64_MAX\n"
47192"# define __UINT_LEAST32_MAX UINT64_MAX\n"
47193"# define __INT_LEAST16_MIN INT64_MIN\n"
47194"# define __INT_LEAST16_MAX INT64_MAX\n"
47195"# define __UINT_LEAST16_MAX UINT64_MAX\n"
47196"# define __INT_LEAST8_MIN INT64_MIN\n"
47197"# define __INT_LEAST8_MAX INT64_MAX\n"
47198"# define __UINT_LEAST8_MAX UINT64_MAX\n"
47199"#endif /* __INT64_TYPE__ */\n"
47200"\n"
47201"#ifdef __INT_LEAST64_MIN\n"
47202"# define INT_LEAST64_MIN __INT_LEAST64_MIN\n"
47203"# define INT_LEAST64_MAX __INT_LEAST64_MAX\n"
47204"# define UINT_LEAST64_MAX __UINT_LEAST64_MAX\n"
47205"# define INT_FAST64_MIN __INT_LEAST64_MIN\n"
47206"# define INT_FAST64_MAX __INT_LEAST64_MAX\n"
47207"# define UINT_FAST64_MAX __UINT_LEAST64_MAX\n"
47208"#endif /* __INT_LEAST64_MIN */\n"
47209"\n"
47210"\n"
47211"#ifdef __INT56_TYPE__\n"
47212"# define INT56_MAX INT56_C(36028797018963967)\n"
47213"# define INT56_MIN (-INT56_C(36028797018963967)-1)\n"
47214"# define UINT56_MAX UINT56_C(72057594037927935)\n"
47215"# define INT_LEAST56_MIN INT56_MIN\n"
47216"# define INT_LEAST56_MAX INT56_MAX\n"
47217"# define UINT_LEAST56_MAX UINT56_MAX\n"
47218"# define INT_FAST56_MIN INT56_MIN\n"
47219"# define INT_FAST56_MAX INT56_MAX\n"
47220"# define UINT_FAST56_MAX UINT56_MAX\n"
47221"# define __INT_LEAST32_MIN INT56_MIN\n"
47222"# define __INT_LEAST32_MAX INT56_MAX\n"
47223"# define __UINT_LEAST32_MAX UINT56_MAX\n"
47224"# define __INT_LEAST16_MIN INT56_MIN\n"
47225"# define __INT_LEAST16_MAX INT56_MAX\n"
47226"# define __UINT_LEAST16_MAX UINT56_MAX\n"
47227"# define __INT_LEAST8_MIN INT56_MIN\n"
47228"# define __INT_LEAST8_MAX INT56_MAX\n"
47229"# define __UINT_LEAST8_MAX UINT56_MAX\n"
47230"#endif /* __INT56_TYPE__ */\n"
47231"\n"
47232"\n"
47233"#ifdef __INT48_TYPE__\n"
47234"# define INT48_MAX INT48_C(140737488355327)\n"
47235"# define INT48_MIN (-INT48_C(140737488355327)-1)\n"
47236"# define UINT48_MAX UINT48_C(281474976710655)\n"
47237"# define INT_LEAST48_MIN INT48_MIN\n"
47238"# define INT_LEAST48_MAX INT48_MAX\n"
47239"# define UINT_LEAST48_MAX UINT48_MAX\n"
47240"# define INT_FAST48_MIN INT48_MIN\n"
47241"# define INT_FAST48_MAX INT48_MAX\n"
47242"# define UINT_FAST48_MAX UINT48_MAX\n"
47243"# define __INT_LEAST32_MIN INT48_MIN\n"
47244"# define __INT_LEAST32_MAX INT48_MAX\n"
47245"# define __UINT_LEAST32_MAX UINT48_MAX\n"
47246"# define __INT_LEAST16_MIN INT48_MIN\n"
47247"# define __INT_LEAST16_MAX INT48_MAX\n"
47248"# define __UINT_LEAST16_MAX UINT48_MAX\n"
47249"# define __INT_LEAST8_MIN INT48_MIN\n"
47250"# define __INT_LEAST8_MAX INT48_MAX\n"
47251"# define __UINT_LEAST8_MAX UINT48_MAX\n"
47252"#endif /* __INT48_TYPE__ */\n"
47253"\n"
47254"\n"
47255"#ifdef __INT40_TYPE__\n"
47256"# define INT40_MAX INT40_C(549755813887)\n"
47257"# define INT40_MIN (-INT40_C(549755813887)-1)\n"
47258"# define UINT40_MAX UINT40_C(1099511627775)\n"
47259"# define INT_LEAST40_MIN INT40_MIN\n"
47260"# define INT_LEAST40_MAX INT40_MAX\n"
47261"# define UINT_LEAST40_MAX UINT40_MAX\n"
47262"# define INT_FAST40_MIN INT40_MIN\n"
47263"# define INT_FAST40_MAX INT40_MAX\n"
47264"# define UINT_FAST40_MAX UINT40_MAX\n"
47265"# define __INT_LEAST32_MIN INT40_MIN\n"
47266"# define __INT_LEAST32_MAX INT40_MAX\n"
47267"# define __UINT_LEAST32_MAX UINT40_MAX\n"
47268"# define __INT_LEAST16_MIN INT40_MIN\n"
47269"# define __INT_LEAST16_MAX INT40_MAX\n"
47270"# define __UINT_LEAST16_MAX UINT40_MAX\n"
47271"# define __INT_LEAST8_MIN INT40_MIN\n"
47272"# define __INT_LEAST8_MAX INT40_MAX\n"
47273"# define __UINT_LEAST8_MAX UINT40_MAX\n"
47274"#endif /* __INT40_TYPE__ */\n"
47275"\n"
47276"\n"
47277"#ifdef __INT32_TYPE__\n"
47278"# define INT32_MAX INT32_C(2147483647)\n"
47279"# define INT32_MIN (-INT32_C(2147483647)-1)\n"
47280"# define UINT32_MAX UINT32_C(4294967295)\n"
47281"# define __INT_LEAST32_MIN INT32_MIN\n"
47282"# define __INT_LEAST32_MAX INT32_MAX\n"
47283"# define __UINT_LEAST32_MAX UINT32_MAX\n"
47284"# define __INT_LEAST16_MIN INT32_MIN\n"
47285"# define __INT_LEAST16_MAX INT32_MAX\n"
47286"# define __UINT_LEAST16_MAX UINT32_MAX\n"
47287"# define __INT_LEAST8_MIN INT32_MIN\n"
47288"# define __INT_LEAST8_MAX INT32_MAX\n"
47289"# define __UINT_LEAST8_MAX UINT32_MAX\n"
47290"#endif /* __INT32_TYPE__ */\n"
47291"\n"
47292"#ifdef __INT_LEAST32_MIN\n"
47293"# define INT_LEAST32_MIN __INT_LEAST32_MIN\n"
47294"# define INT_LEAST32_MAX __INT_LEAST32_MAX\n"
47295"# define UINT_LEAST32_MAX __UINT_LEAST32_MAX\n"
47296"# define INT_FAST32_MIN __INT_LEAST32_MIN\n"
47297"# define INT_FAST32_MAX __INT_LEAST32_MAX\n"
47298"# define UINT_FAST32_MAX __UINT_LEAST32_MAX\n"
47299"#endif /* __INT_LEAST32_MIN */\n"
47300"\n"
47301"\n"
47302"#ifdef __INT24_TYPE__\n"
47303"# define INT24_MAX INT24_C(8388607)\n"
47304"# define INT24_MIN (-INT24_C(8388607)-1)\n"
47305"# define UINT24_MAX UINT24_C(16777215)\n"
47306"# define INT_LEAST24_MIN INT24_MIN\n"
47307"# define INT_LEAST24_MAX INT24_MAX\n"
47308"# define UINT_LEAST24_MAX UINT24_MAX\n"
47309"# define INT_FAST24_MIN INT24_MIN\n"
47310"# define INT_FAST24_MAX INT24_MAX\n"
47311"# define UINT_FAST24_MAX UINT24_MAX\n"
47312"# define __INT_LEAST16_MIN INT24_MIN\n"
47313"# define __INT_LEAST16_MAX INT24_MAX\n"
47314"# define __UINT_LEAST16_MAX UINT24_MAX\n"
47315"# define __INT_LEAST8_MIN INT24_MIN\n"
47316"# define __INT_LEAST8_MAX INT24_MAX\n"
47317"# define __UINT_LEAST8_MAX UINT24_MAX\n"
47318"#endif /* __INT24_TYPE__ */\n"
47319"\n"
47320"\n"
47321"#ifdef __INT16_TYPE__\n"
47322"#define INT16_MAX INT16_C(32767)\n"
47323"#define INT16_MIN (-INT16_C(32767)-1)\n"
47324"#define UINT16_MAX UINT16_C(65535)\n"
47325"# define __INT_LEAST16_MIN INT16_MIN\n"
47326"# define __INT_LEAST16_MAX INT16_MAX\n"
47327"# define __UINT_LEAST16_MAX UINT16_MAX\n"
47328"# define __INT_LEAST8_MIN INT16_MIN\n"
47329"# define __INT_LEAST8_MAX INT16_MAX\n"
47330"# define __UINT_LEAST8_MAX UINT16_MAX\n"
47331"#endif /* __INT16_TYPE__ */\n"
47332"\n"
47333"#ifdef __INT_LEAST16_MIN\n"
47334"# define INT_LEAST16_MIN __INT_LEAST16_MIN\n"
47335"# define INT_LEAST16_MAX __INT_LEAST16_MAX\n"
47336"# define UINT_LEAST16_MAX __UINT_LEAST16_MAX\n"
47337"# define INT_FAST16_MIN __INT_LEAST16_MIN\n"
47338"# define INT_FAST16_MAX __INT_LEAST16_MAX\n"
47339"# define UINT_FAST16_MAX __UINT_LEAST16_MAX\n"
47340"#endif /* __INT_LEAST16_MIN */\n"
47341"\n"
47342"\n"
47343"#ifdef __INT8_TYPE__\n"
47344"# define INT8_MAX INT8_C(127)\n"
47345"# define INT8_MIN (-INT8_C(127)-1)\n"
47346"# define UINT8_MAX UINT8_C(255)\n"
47347"# define __INT_LEAST8_MIN INT8_MIN\n"
47348"# define __INT_LEAST8_MAX INT8_MAX\n"
47349"# define __UINT_LEAST8_MAX UINT8_MAX\n"
47350"#endif /* __INT8_TYPE__ */\n"
47351"\n"
47352"#ifdef __INT_LEAST8_MIN\n"
47353"# define INT_LEAST8_MIN __INT_LEAST8_MIN\n"
47354"# define INT_LEAST8_MAX __INT_LEAST8_MAX\n"
47355"# define UINT_LEAST8_MAX __UINT_LEAST8_MAX\n"
47356"# define INT_FAST8_MIN __INT_LEAST8_MIN\n"
47357"# define INT_FAST8_MAX __INT_LEAST8_MAX\n"
47358"# define UINT_FAST8_MAX __UINT_LEAST8_MAX\n"
47359"#endif /* __INT_LEAST8_MIN */\n"
47360"\n"
47361"/* Some utility macros */\n"
47362"#define __INTN_MIN(n) __stdint_join3( INT, n, _MIN)\n"
47363"#define __INTN_MAX(n) __stdint_join3( INT, n, _MAX)\n"
47364"#define __UINTN_MAX(n) __stdint_join3(UINT, n, _MAX)\n"
47365"#define __INTN_C(n, v) __stdint_join3( INT, n, _C(v))\n"
47366"#define __UINTN_C(n, v) __stdint_join3(UINT, n, _C(v))\n"
47367"\n"
47368"/* C99 7.18.2.4 Limits of integer types capable of holding object pointers. */\n"
47369"/* C99 7.18.3 Limits of other integer types. */\n"
47370"\n"
47371"#define INTPTR_MIN (-__INTPTR_MAX__-1)\n"
47372"#define INTPTR_MAX __INTPTR_MAX__\n"
47373"#define UINTPTR_MAX __UINTPTR_MAX__\n"
47374"#define PTRDIFF_MIN (-__PTRDIFF_MAX__-1)\n"
47375"#define PTRDIFF_MAX __PTRDIFF_MAX__\n"
47376"#define SIZE_MAX __SIZE_MAX__\n"
47377"\n"
47378"/* ISO9899:2011 7.20 (C11 Annex K): Define RSIZE_MAX if __STDC_WANT_LIB_EXT1__\n"
47379" * is enabled. */\n"
47380"#if defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1\n"
47381"#define RSIZE_MAX (SIZE_MAX >> 1)\n"
47382"#endif\n"
47383"\n"
47384"/* C99 7.18.2.5 Limits of greatest-width integer types. */\n"
47385"#define INTMAX_MIN (-__INTMAX_MAX__-1)\n"
47386"#define INTMAX_MAX __INTMAX_MAX__\n"
47387"#define UINTMAX_MAX __UINTMAX_MAX__\n"
47388"\n"
47389"/* C99 7.18.3 Limits of other integer types. */\n"
47390"#define SIG_ATOMIC_MIN __INTN_MIN(__SIG_ATOMIC_WIDTH__)\n"
47391"#define SIG_ATOMIC_MAX __INTN_MAX(__SIG_ATOMIC_WIDTH__)\n"
47392"#ifdef __WINT_UNSIGNED__\n"
47393"# define WINT_MIN __UINTN_C(__WINT_WIDTH__, 0)\n"
47394"# define WINT_MAX __UINTN_MAX(__WINT_WIDTH__)\n"
47395"#else\n"
47396"# define WINT_MIN __INTN_MIN(__WINT_WIDTH__)\n"
47397"# define WINT_MAX __INTN_MAX(__WINT_WIDTH__)\n"
47398"#endif\n"
47399"\n"
47400"#ifndef WCHAR_MAX\n"
47401"# define WCHAR_MAX __WCHAR_MAX__\n"
47402"#endif\n"
47403"#ifndef WCHAR_MIN\n"
47404"# if __WCHAR_MAX__ == __INTN_MAX(__WCHAR_WIDTH__)\n"
47405"# define WCHAR_MIN __INTN_MIN(__WCHAR_WIDTH__)\n"
47406"# else\n"
47407"# define WCHAR_MIN __UINTN_C(__WCHAR_WIDTH__, 0)\n"
47408"# endif\n"
47409"#endif\n"
47410"\n"
47411"/* 7.18.4.2 Macros for greatest-width integer constants. */\n"
47412"#define INTMAX_C(v) __int_c(v, __INTMAX_C_SUFFIX__)\n"
47413"#define UINTMAX_C(v) __int_c(v, __UINTMAX_C_SUFFIX__)\n"
47414"\n"
47415"#endif /* __CLANG_STDINT_H2 */\n"
47416"#endif /* __STDC_HOSTED__ */\n"
47417"" } ,
47418 { "/builtins/stdnoreturn.h" , "/*===---- stdnoreturn.h - Standard header for noreturn macro ---------------===\n"
47419" *\n"
47420" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
47421" * of this software and associated documentation files (the \"Software\"), to deal\n"
47422" * in the Software without restriction, including without limitation the rights\n"
47423" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
47424" * copies of the Software, and to permit persons to whom the Software is\n"
47425" * furnished to do so, subject to the following conditions:\n"
47426" *\n"
47427" * The above copyright notice and this permission notice shall be included in\n"
47428" * all copies or substantial portions of the Software.\n"
47429" *\n"
47430" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
47431" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
47432" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
47433" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
47434" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
47435" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
47436" * THE SOFTWARE.\n"
47437" *\n"
47438" *===-----------------------------------------------------------------------===\n"
47439" */\n"
47440"\n"
47441"#ifndef __STDNORETURN_H\n"
47442"#define __STDNORETURN_H\n"
47443"\n"
47444"#define noreturn _Noreturn\n"
47445"#define __noreturn_is_defined 1\n"
47446"\n"
47447"#endif /* __STDNORETURN_H */\n"
47448"" } ,
47449 { "/builtins/tbmintrin.h" , "/*===---- tbmintrin.h - TBM intrinsics -------------------------------------===\n"
47450" *\n"
47451" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
47452" * of this software and associated documentation files (the \"Software\"), to deal\n"
47453" * in the Software without restriction, including without limitation the rights\n"
47454" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
47455" * copies of the Software, and to permit persons to whom the Software is\n"
47456" * furnished to do so, subject to the following conditions:\n"
47457" *\n"
47458" * The above copyright notice and this permission notice shall be included in\n"
47459" * all copies or substantial portions of the Software.\n"
47460" *\n"
47461" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
47462" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
47463" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
47464" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
47465" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
47466" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
47467" * THE SOFTWARE.\n"
47468" *\n"
47469" *===-----------------------------------------------------------------------===\n"
47470" */\n"
47471"\n"
47472"#ifndef __X86INTRIN_H\n"
47473"#error \"Never use <tbmintrin.h> directly; include <x86intrin.h> instead.\"\n"
47474"#endif\n"
47475"\n"
47476"#ifndef __TBMINTRIN_H\n"
47477"#define __TBMINTRIN_H\n"
47478"\n"
47479"/* Define the default attributes for the functions in this file. */\n"
47480"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"tbm\")))\n"
47481"\n"
47482"#define __bextri_u32(a, b) \\\n"
47483" ((unsigned int)__builtin_ia32_bextri_u32((unsigned int)(a), \\\n"
47484" (unsigned int)(b)))\n"
47485"\n"
47486"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
47487"__blcfill_u32(unsigned int __a)\n"
47488"{\n"
47489" return __a & (__a + 1);\n"
47490"}\n"
47491"\n"
47492"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
47493"__blci_u32(unsigned int __a)\n"
47494"{\n"
47495" return __a | ~(__a + 1);\n"
47496"}\n"
47497"\n"
47498"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
47499"__blcic_u32(unsigned int __a)\n"
47500"{\n"
47501" return ~__a & (__a + 1);\n"
47502"}\n"
47503"\n"
47504"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
47505"__blcmsk_u32(unsigned int __a)\n"
47506"{\n"
47507" return __a ^ (__a + 1);\n"
47508"}\n"
47509"\n"
47510"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
47511"__blcs_u32(unsigned int __a)\n"
47512"{\n"
47513" return __a | (__a + 1);\n"
47514"}\n"
47515"\n"
47516"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
47517"__blsfill_u32(unsigned int __a)\n"
47518"{\n"
47519" return __a | (__a - 1);\n"
47520"}\n"
47521"\n"
47522"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
47523"__blsic_u32(unsigned int __a)\n"
47524"{\n"
47525" return ~__a | (__a - 1);\n"
47526"}\n"
47527"\n"
47528"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
47529"__t1mskc_u32(unsigned int __a)\n"
47530"{\n"
47531" return ~__a | (__a + 1);\n"
47532"}\n"
47533"\n"
47534"static __inline__ unsigned int __DEFAULT_FN_ATTRS\n"
47535"__tzmsk_u32(unsigned int __a)\n"
47536"{\n"
47537" return ~__a & (__a - 1);\n"
47538"}\n"
47539"\n"
47540"#ifdef __x86_64__\n"
47541"#define __bextri_u64(a, b) \\\n"
47542" ((unsigned long long)__builtin_ia32_bextri_u64((unsigned long long)(a), \\\n"
47543" (unsigned long long)(b)))\n"
47544"\n"
47545"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
47546"__blcfill_u64(unsigned long long __a)\n"
47547"{\n"
47548" return __a & (__a + 1);\n"
47549"}\n"
47550"\n"
47551"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
47552"__blci_u64(unsigned long long __a)\n"
47553"{\n"
47554" return __a | ~(__a + 1);\n"
47555"}\n"
47556"\n"
47557"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
47558"__blcic_u64(unsigned long long __a)\n"
47559"{\n"
47560" return ~__a & (__a + 1);\n"
47561"}\n"
47562"\n"
47563"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
47564"__blcmsk_u64(unsigned long long __a)\n"
47565"{\n"
47566" return __a ^ (__a + 1);\n"
47567"}\n"
47568"\n"
47569"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
47570"__blcs_u64(unsigned long long __a)\n"
47571"{\n"
47572" return __a | (__a + 1);\n"
47573"}\n"
47574"\n"
47575"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
47576"__blsfill_u64(unsigned long long __a)\n"
47577"{\n"
47578" return __a | (__a - 1);\n"
47579"}\n"
47580"\n"
47581"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
47582"__blsic_u64(unsigned long long __a)\n"
47583"{\n"
47584" return ~__a | (__a - 1);\n"
47585"}\n"
47586"\n"
47587"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
47588"__t1mskc_u64(unsigned long long __a)\n"
47589"{\n"
47590" return ~__a | (__a + 1);\n"
47591"}\n"
47592"\n"
47593"static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n"
47594"__tzmsk_u64(unsigned long long __a)\n"
47595"{\n"
47596" return ~__a & (__a - 1);\n"
47597"}\n"
47598"#endif\n"
47599"\n"
47600"#undef __DEFAULT_FN_ATTRS\n"
47601"\n"
47602"#endif /* __TBMINTRIN_H */\n"
47603"" } ,
47604 { "/builtins/tgmath.h" , "/*===---- tgmath.h - Standard header for type generic math ----------------===*\\\n"
47605" *\n"
47606" * Copyright (c) 2009 Howard Hinnant\n"
47607" *\n"
47608" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
47609" * of this software and associated documentation files (the \"Software\"), to deal\n"
47610" * in the Software without restriction, including without limitation the rights\n"
47611" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
47612" * copies of the Software, and to permit persons to whom the Software is\n"
47613" * furnished to do so, subject to the following conditions:\n"
47614" *\n"
47615" * The above copyright notice and this permission notice shall be included in\n"
47616" * all copies or substantial portions of the Software.\n"
47617" *\n"
47618" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
47619" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
47620" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
47621" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
47622" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
47623" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
47624" * THE SOFTWARE.\n"
47625" *\n"
47626"\\*===----------------------------------------------------------------------===*/\n"
47627"\n"
47628"#ifndef __CLANG_TGMATH_H\n"
47629"#define __CLANG_TGMATH_H\n"
47630"\n"
47631"/* C99 7.22 Type-generic math <tgmath.h>. */\n"
47632"#include <math.h>\n"
47633"\n"
47634"/*\n"
47635" * Allow additional definitions and implementation-defined values on Apple\n"
47636" * platforms. This is done after #include <math.h> to avoid depcycle conflicts\n"
47637" * between libcxx and darwin in C++ modules builds.\n"
47638" */\n"
47639"#if defined(__APPLE__) && __STDC_HOSTED__ && __has_include_next(<tgmath.h>)\n"
47640"# include_next <tgmath.h>\n"
47641"#else\n"
47642"\n"
47643"/* C++ handles type genericity with overloading in math.h. */\n"
47644"#ifndef __cplusplus\n"
47645"#include <complex.h>\n"
47646"\n"
47647"#define _TG_ATTRSp __attribute__((__overloadable__))\n"
47648"#define _TG_ATTRS __attribute__((__overloadable__, __always_inline__))\n"
47649"\n"
47650"// promotion\n"
47651"\n"
47652"typedef void _Argument_type_is_not_arithmetic;\n"
47653"static _Argument_type_is_not_arithmetic __tg_promote(...)\n"
47654" __attribute__((__unavailable__,__overloadable__));\n"
47655"static double _TG_ATTRSp __tg_promote(int);\n"
47656"static double _TG_ATTRSp __tg_promote(unsigned int);\n"
47657"static double _TG_ATTRSp __tg_promote(long);\n"
47658"static double _TG_ATTRSp __tg_promote(unsigned long);\n"
47659"static double _TG_ATTRSp __tg_promote(long long);\n"
47660"static double _TG_ATTRSp __tg_promote(unsigned long long);\n"
47661"static float _TG_ATTRSp __tg_promote(float);\n"
47662"static double _TG_ATTRSp __tg_promote(double);\n"
47663"static long double _TG_ATTRSp __tg_promote(long double);\n"
47664"static float _Complex _TG_ATTRSp __tg_promote(float _Complex);\n"
47665"static double _Complex _TG_ATTRSp __tg_promote(double _Complex);\n"
47666"static long double _Complex _TG_ATTRSp __tg_promote(long double _Complex);\n"
47667"\n"
47668"#define __tg_promote1(__x) (__typeof__(__tg_promote(__x)))\n"
47669"#define __tg_promote2(__x, __y) (__typeof__(__tg_promote(__x) + \\\n"
47670" __tg_promote(__y)))\n"
47671"#define __tg_promote3(__x, __y, __z) (__typeof__(__tg_promote(__x) + \\\n"
47672" __tg_promote(__y) + \\\n"
47673" __tg_promote(__z)))\n"
47674"\n"
47675"// acos\n"
47676"\n"
47677"static float\n"
47678" _TG_ATTRS\n"
47679" __tg_acos(float __x) {return acosf(__x);}\n"
47680"\n"
47681"static double\n"
47682" _TG_ATTRS\n"
47683" __tg_acos(double __x) {return acos(__x);}\n"
47684"\n"
47685"static long double\n"
47686" _TG_ATTRS\n"
47687" __tg_acos(long double __x) {return acosl(__x);}\n"
47688"\n"
47689"static float _Complex\n"
47690" _TG_ATTRS\n"
47691" __tg_acos(float _Complex __x) {return cacosf(__x);}\n"
47692"\n"
47693"static double _Complex\n"
47694" _TG_ATTRS\n"
47695" __tg_acos(double _Complex __x) {return cacos(__x);}\n"
47696"\n"
47697"static long double _Complex\n"
47698" _TG_ATTRS\n"
47699" __tg_acos(long double _Complex __x) {return cacosl(__x);}\n"
47700"\n"
47701"#undef acos\n"
47702"#define acos(__x) __tg_acos(__tg_promote1((__x))(__x))\n"
47703"\n"
47704"// asin\n"
47705"\n"
47706"static float\n"
47707" _TG_ATTRS\n"
47708" __tg_asin(float __x) {return asinf(__x);}\n"
47709"\n"
47710"static double\n"
47711" _TG_ATTRS\n"
47712" __tg_asin(double __x) {return asin(__x);}\n"
47713"\n"
47714"static long double\n"
47715" _TG_ATTRS\n"
47716" __tg_asin(long double __x) {return asinl(__x);}\n"
47717"\n"
47718"static float _Complex\n"
47719" _TG_ATTRS\n"
47720" __tg_asin(float _Complex __x) {return casinf(__x);}\n"
47721"\n"
47722"static double _Complex\n"
47723" _TG_ATTRS\n"
47724" __tg_asin(double _Complex __x) {return casin(__x);}\n"
47725"\n"
47726"static long double _Complex\n"
47727" _TG_ATTRS\n"
47728" __tg_asin(long double _Complex __x) {return casinl(__x);}\n"
47729"\n"
47730"#undef asin\n"
47731"#define asin(__x) __tg_asin(__tg_promote1((__x))(__x))\n"
47732"\n"
47733"// atan\n"
47734"\n"
47735"static float\n"
47736" _TG_ATTRS\n"
47737" __tg_atan(float __x) {return atanf(__x);}\n"
47738"\n"
47739"static double\n"
47740" _TG_ATTRS\n"
47741" __tg_atan(double __x) {return atan(__x);}\n"
47742"\n"
47743"static long double\n"
47744" _TG_ATTRS\n"
47745" __tg_atan(long double __x) {return atanl(__x);}\n"
47746"\n"
47747"static float _Complex\n"
47748" _TG_ATTRS\n"
47749" __tg_atan(float _Complex __x) {return catanf(__x);}\n"
47750"\n"
47751"static double _Complex\n"
47752" _TG_ATTRS\n"
47753" __tg_atan(double _Complex __x) {return catan(__x);}\n"
47754"\n"
47755"static long double _Complex\n"
47756" _TG_ATTRS\n"
47757" __tg_atan(long double _Complex __x) {return catanl(__x);}\n"
47758"\n"
47759"#undef atan\n"
47760"#define atan(__x) __tg_atan(__tg_promote1((__x))(__x))\n"
47761"\n"
47762"// acosh\n"
47763"\n"
47764"static float\n"
47765" _TG_ATTRS\n"
47766" __tg_acosh(float __x) {return acoshf(__x);}\n"
47767"\n"
47768"static double\n"
47769" _TG_ATTRS\n"
47770" __tg_acosh(double __x) {return acosh(__x);}\n"
47771"\n"
47772"static long double\n"
47773" _TG_ATTRS\n"
47774" __tg_acosh(long double __x) {return acoshl(__x);}\n"
47775"\n"
47776"static float _Complex\n"
47777" _TG_ATTRS\n"
47778" __tg_acosh(float _Complex __x) {return cacoshf(__x);}\n"
47779"\n"
47780"static double _Complex\n"
47781" _TG_ATTRS\n"
47782" __tg_acosh(double _Complex __x) {return cacosh(__x);}\n"
47783"\n"
47784"static long double _Complex\n"
47785" _TG_ATTRS\n"
47786" __tg_acosh(long double _Complex __x) {return cacoshl(__x);}\n"
47787"\n"
47788"#undef acosh\n"
47789"#define acosh(__x) __tg_acosh(__tg_promote1((__x))(__x))\n"
47790"\n"
47791"// asinh\n"
47792"\n"
47793"static float\n"
47794" _TG_ATTRS\n"
47795" __tg_asinh(float __x) {return asinhf(__x);}\n"
47796"\n"
47797"static double\n"
47798" _TG_ATTRS\n"
47799" __tg_asinh(double __x) {return asinh(__x);}\n"
47800"\n"
47801"static long double\n"
47802" _TG_ATTRS\n"
47803" __tg_asinh(long double __x) {return asinhl(__x);}\n"
47804"\n"
47805"static float _Complex\n"
47806" _TG_ATTRS\n"
47807" __tg_asinh(float _Complex __x) {return casinhf(__x);}\n"
47808"\n"
47809"static double _Complex\n"
47810" _TG_ATTRS\n"
47811" __tg_asinh(double _Complex __x) {return casinh(__x);}\n"
47812"\n"
47813"static long double _Complex\n"
47814" _TG_ATTRS\n"
47815" __tg_asinh(long double _Complex __x) {return casinhl(__x);}\n"
47816"\n"
47817"#undef asinh\n"
47818"#define asinh(__x) __tg_asinh(__tg_promote1((__x))(__x))\n"
47819"\n"
47820"// atanh\n"
47821"\n"
47822"static float\n"
47823" _TG_ATTRS\n"
47824" __tg_atanh(float __x) {return atanhf(__x);}\n"
47825"\n"
47826"static double\n"
47827" _TG_ATTRS\n"
47828" __tg_atanh(double __x) {return atanh(__x);}\n"
47829"\n"
47830"static long double\n"
47831" _TG_ATTRS\n"
47832" __tg_atanh(long double __x) {return atanhl(__x);}\n"
47833"\n"
47834"static float _Complex\n"
47835" _TG_ATTRS\n"
47836" __tg_atanh(float _Complex __x) {return catanhf(__x);}\n"
47837"\n"
47838"static double _Complex\n"
47839" _TG_ATTRS\n"
47840" __tg_atanh(double _Complex __x) {return catanh(__x);}\n"
47841"\n"
47842"static long double _Complex\n"
47843" _TG_ATTRS\n"
47844" __tg_atanh(long double _Complex __x) {return catanhl(__x);}\n"
47845"\n"
47846"#undef atanh\n"
47847"#define atanh(__x) __tg_atanh(__tg_promote1((__x))(__x))\n"
47848"\n"
47849"// cos\n"
47850"\n"
47851"static float\n"
47852" _TG_ATTRS\n"
47853" __tg_cos(float __x) {return cosf(__x);}\n"
47854"\n"
47855"static double\n"
47856" _TG_ATTRS\n"
47857" __tg_cos(double __x) {return cos(__x);}\n"
47858"\n"
47859"static long double\n"
47860" _TG_ATTRS\n"
47861" __tg_cos(long double __x) {return cosl(__x);}\n"
47862"\n"
47863"static float _Complex\n"
47864" _TG_ATTRS\n"
47865" __tg_cos(float _Complex __x) {return ccosf(__x);}\n"
47866"\n"
47867"static double _Complex\n"
47868" _TG_ATTRS\n"
47869" __tg_cos(double _Complex __x) {return ccos(__x);}\n"
47870"\n"
47871"static long double _Complex\n"
47872" _TG_ATTRS\n"
47873" __tg_cos(long double _Complex __x) {return ccosl(__x);}\n"
47874"\n"
47875"#undef cos\n"
47876"#define cos(__x) __tg_cos(__tg_promote1((__x))(__x))\n"
47877"\n"
47878"// sin\n"
47879"\n"
47880"static float\n"
47881" _TG_ATTRS\n"
47882" __tg_sin(float __x) {return sinf(__x);}\n"
47883"\n"
47884"static double\n"
47885" _TG_ATTRS\n"
47886" __tg_sin(double __x) {return sin(__x);}\n"
47887"\n"
47888"static long double\n"
47889" _TG_ATTRS\n"
47890" __tg_sin(long double __x) {return sinl(__x);}\n"
47891"\n"
47892"static float _Complex\n"
47893" _TG_ATTRS\n"
47894" __tg_sin(float _Complex __x) {return csinf(__x);}\n"
47895"\n"
47896"static double _Complex\n"
47897" _TG_ATTRS\n"
47898" __tg_sin(double _Complex __x) {return csin(__x);}\n"
47899"\n"
47900"static long double _Complex\n"
47901" _TG_ATTRS\n"
47902" __tg_sin(long double _Complex __x) {return csinl(__x);}\n"
47903"\n"
47904"#undef sin\n"
47905"#define sin(__x) __tg_sin(__tg_promote1((__x))(__x))\n"
47906"\n"
47907"// tan\n"
47908"\n"
47909"static float\n"
47910" _TG_ATTRS\n"
47911" __tg_tan(float __x) {return tanf(__x);}\n"
47912"\n"
47913"static double\n"
47914" _TG_ATTRS\n"
47915" __tg_tan(double __x) {return tan(__x);}\n"
47916"\n"
47917"static long double\n"
47918" _TG_ATTRS\n"
47919" __tg_tan(long double __x) {return tanl(__x);}\n"
47920"\n"
47921"static float _Complex\n"
47922" _TG_ATTRS\n"
47923" __tg_tan(float _Complex __x) {return ctanf(__x);}\n"
47924"\n"
47925"static double _Complex\n"
47926" _TG_ATTRS\n"
47927" __tg_tan(double _Complex __x) {return ctan(__x);}\n"
47928"\n"
47929"static long double _Complex\n"
47930" _TG_ATTRS\n"
47931" __tg_tan(long double _Complex __x) {return ctanl(__x);}\n"
47932"\n"
47933"#undef tan\n"
47934"#define tan(__x) __tg_tan(__tg_promote1((__x))(__x))\n"
47935"\n"
47936"// cosh\n"
47937"\n"
47938"static float\n"
47939" _TG_ATTRS\n"
47940" __tg_cosh(float __x) {return coshf(__x);}\n"
47941"\n"
47942"static double\n"
47943" _TG_ATTRS\n"
47944" __tg_cosh(double __x) {return cosh(__x);}\n"
47945"\n"
47946"static long double\n"
47947" _TG_ATTRS\n"
47948" __tg_cosh(long double __x) {return coshl(__x);}\n"
47949"\n"
47950"static float _Complex\n"
47951" _TG_ATTRS\n"
47952" __tg_cosh(float _Complex __x) {return ccoshf(__x);}\n"
47953"\n"
47954"static double _Complex\n"
47955" _TG_ATTRS\n"
47956" __tg_cosh(double _Complex __x) {return ccosh(__x);}\n"
47957"\n"
47958"static long double _Complex\n"
47959" _TG_ATTRS\n"
47960" __tg_cosh(long double _Complex __x) {return ccoshl(__x);}\n"
47961"\n"
47962"#undef cosh\n"
47963"#define cosh(__x) __tg_cosh(__tg_promote1((__x))(__x))\n"
47964"\n"
47965"// sinh\n"
47966"\n"
47967"static float\n"
47968" _TG_ATTRS\n"
47969" __tg_sinh(float __x) {return sinhf(__x);}\n"
47970"\n"
47971"static double\n"
47972" _TG_ATTRS\n"
47973" __tg_sinh(double __x) {return sinh(__x);}\n"
47974"\n"
47975"static long double\n"
47976" _TG_ATTRS\n"
47977" __tg_sinh(long double __x) {return sinhl(__x);}\n"
47978"\n"
47979"static float _Complex\n"
47980" _TG_ATTRS\n"
47981" __tg_sinh(float _Complex __x) {return csinhf(__x);}\n"
47982"\n"
47983"static double _Complex\n"
47984" _TG_ATTRS\n"
47985" __tg_sinh(double _Complex __x) {return csinh(__x);}\n"
47986"\n"
47987"static long double _Complex\n"
47988" _TG_ATTRS\n"
47989" __tg_sinh(long double _Complex __x) {return csinhl(__x);}\n"
47990"\n"
47991"#undef sinh\n"
47992"#define sinh(__x) __tg_sinh(__tg_promote1((__x))(__x))\n"
47993"\n"
47994"// tanh\n"
47995"\n"
47996"static float\n"
47997" _TG_ATTRS\n"
47998" __tg_tanh(float __x) {return tanhf(__x);}\n"
47999"\n"
48000"static double\n"
48001" _TG_ATTRS\n"
48002" __tg_tanh(double __x) {return tanh(__x);}\n"
48003"\n"
48004"static long double\n"
48005" _TG_ATTRS\n"
48006" __tg_tanh(long double __x) {return tanhl(__x);}\n"
48007"\n"
48008"static float _Complex\n"
48009" _TG_ATTRS\n"
48010" __tg_tanh(float _Complex __x) {return ctanhf(__x);}\n"
48011"\n"
48012"static double _Complex\n"
48013" _TG_ATTRS\n"
48014" __tg_tanh(double _Complex __x) {return ctanh(__x);}\n"
48015"\n"
48016"static long double _Complex\n"
48017" _TG_ATTRS\n"
48018" __tg_tanh(long double _Complex __x) {return ctanhl(__x);}\n"
48019"\n"
48020"#undef tanh\n"
48021"#define tanh(__x) __tg_tanh(__tg_promote1((__x))(__x))\n"
48022"\n"
48023"// exp\n"
48024"\n"
48025"static float\n"
48026" _TG_ATTRS\n"
48027" __tg_exp(float __x) {return expf(__x);}\n"
48028"\n"
48029"static double\n"
48030" _TG_ATTRS\n"
48031" __tg_exp(double __x) {return exp(__x);}\n"
48032"\n"
48033"static long double\n"
48034" _TG_ATTRS\n"
48035" __tg_exp(long double __x) {return expl(__x);}\n"
48036"\n"
48037"static float _Complex\n"
48038" _TG_ATTRS\n"
48039" __tg_exp(float _Complex __x) {return cexpf(__x);}\n"
48040"\n"
48041"static double _Complex\n"
48042" _TG_ATTRS\n"
48043" __tg_exp(double _Complex __x) {return cexp(__x);}\n"
48044"\n"
48045"static long double _Complex\n"
48046" _TG_ATTRS\n"
48047" __tg_exp(long double _Complex __x) {return cexpl(__x);}\n"
48048"\n"
48049"#undef exp\n"
48050"#define exp(__x) __tg_exp(__tg_promote1((__x))(__x))\n"
48051"\n"
48052"// log\n"
48053"\n"
48054"static float\n"
48055" _TG_ATTRS\n"
48056" __tg_log(float __x) {return logf(__x);}\n"
48057"\n"
48058"static double\n"
48059" _TG_ATTRS\n"
48060" __tg_log(double __x) {return log(__x);}\n"
48061"\n"
48062"static long double\n"
48063" _TG_ATTRS\n"
48064" __tg_log(long double __x) {return logl(__x);}\n"
48065"\n"
48066"static float _Complex\n"
48067" _TG_ATTRS\n"
48068" __tg_log(float _Complex __x) {return clogf(__x);}\n"
48069"\n"
48070"static double _Complex\n"
48071" _TG_ATTRS\n"
48072" __tg_log(double _Complex __x) {return clog(__x);}\n"
48073"\n"
48074"static long double _Complex\n"
48075" _TG_ATTRS\n"
48076" __tg_log(long double _Complex __x) {return clogl(__x);}\n"
48077"\n"
48078"#undef log\n"
48079"#define log(__x) __tg_log(__tg_promote1((__x))(__x))\n"
48080"\n"
48081"// pow\n"
48082"\n"
48083"static float\n"
48084" _TG_ATTRS\n"
48085" __tg_pow(float __x, float __y) {return powf(__x, __y);}\n"
48086"\n"
48087"static double\n"
48088" _TG_ATTRS\n"
48089" __tg_pow(double __x, double __y) {return pow(__x, __y);}\n"
48090"\n"
48091"static long double\n"
48092" _TG_ATTRS\n"
48093" __tg_pow(long double __x, long double __y) {return powl(__x, __y);}\n"
48094"\n"
48095"static float _Complex\n"
48096" _TG_ATTRS\n"
48097" __tg_pow(float _Complex __x, float _Complex __y) {return cpowf(__x, __y);}\n"
48098"\n"
48099"static double _Complex\n"
48100" _TG_ATTRS\n"
48101" __tg_pow(double _Complex __x, double _Complex __y) {return cpow(__x, __y);}\n"
48102"\n"
48103"static long double _Complex\n"
48104" _TG_ATTRS\n"
48105" __tg_pow(long double _Complex __x, long double _Complex __y)\n"
48106" {return cpowl(__x, __y);}\n"
48107"\n"
48108"#undef pow\n"
48109"#define pow(__x, __y) __tg_pow(__tg_promote2((__x), (__y))(__x), \\\n"
48110" __tg_promote2((__x), (__y))(__y))\n"
48111"\n"
48112"// sqrt\n"
48113"\n"
48114"static float\n"
48115" _TG_ATTRS\n"
48116" __tg_sqrt(float __x) {return sqrtf(__x);}\n"
48117"\n"
48118"static double\n"
48119" _TG_ATTRS\n"
48120" __tg_sqrt(double __x) {return sqrt(__x);}\n"
48121"\n"
48122"static long double\n"
48123" _TG_ATTRS\n"
48124" __tg_sqrt(long double __x) {return sqrtl(__x);}\n"
48125"\n"
48126"static float _Complex\n"
48127" _TG_ATTRS\n"
48128" __tg_sqrt(float _Complex __x) {return csqrtf(__x);}\n"
48129"\n"
48130"static double _Complex\n"
48131" _TG_ATTRS\n"
48132" __tg_sqrt(double _Complex __x) {return csqrt(__x);}\n"
48133"\n"
48134"static long double _Complex\n"
48135" _TG_ATTRS\n"
48136" __tg_sqrt(long double _Complex __x) {return csqrtl(__x);}\n"
48137"\n"
48138"#undef sqrt\n"
48139"#define sqrt(__x) __tg_sqrt(__tg_promote1((__x))(__x))\n"
48140"\n"
48141"// fabs\n"
48142"\n"
48143"static float\n"
48144" _TG_ATTRS\n"
48145" __tg_fabs(float __x) {return fabsf(__x);}\n"
48146"\n"
48147"static double\n"
48148" _TG_ATTRS\n"
48149" __tg_fabs(double __x) {return fabs(__x);}\n"
48150"\n"
48151"static long double\n"
48152" _TG_ATTRS\n"
48153" __tg_fabs(long double __x) {return fabsl(__x);}\n"
48154"\n"
48155"static float\n"
48156" _TG_ATTRS\n"
48157" __tg_fabs(float _Complex __x) {return cabsf(__x);}\n"
48158"\n"
48159"static double\n"
48160" _TG_ATTRS\n"
48161" __tg_fabs(double _Complex __x) {return cabs(__x);}\n"
48162"\n"
48163"static long double\n"
48164" _TG_ATTRS\n"
48165" __tg_fabs(long double _Complex __x) {return cabsl(__x);}\n"
48166"\n"
48167"#undef fabs\n"
48168"#define fabs(__x) __tg_fabs(__tg_promote1((__x))(__x))\n"
48169"\n"
48170"// atan2\n"
48171"\n"
48172"static float\n"
48173" _TG_ATTRS\n"
48174" __tg_atan2(float __x, float __y) {return atan2f(__x, __y);}\n"
48175"\n"
48176"static double\n"
48177" _TG_ATTRS\n"
48178" __tg_atan2(double __x, double __y) {return atan2(__x, __y);}\n"
48179"\n"
48180"static long double\n"
48181" _TG_ATTRS\n"
48182" __tg_atan2(long double __x, long double __y) {return atan2l(__x, __y);}\n"
48183"\n"
48184"#undef atan2\n"
48185"#define atan2(__x, __y) __tg_atan2(__tg_promote2((__x), (__y))(__x), \\\n"
48186" __tg_promote2((__x), (__y))(__y))\n"
48187"\n"
48188"// cbrt\n"
48189"\n"
48190"static float\n"
48191" _TG_ATTRS\n"
48192" __tg_cbrt(float __x) {return cbrtf(__x);}\n"
48193"\n"
48194"static double\n"
48195" _TG_ATTRS\n"
48196" __tg_cbrt(double __x) {return cbrt(__x);}\n"
48197"\n"
48198"static long double\n"
48199" _TG_ATTRS\n"
48200" __tg_cbrt(long double __x) {return cbrtl(__x);}\n"
48201"\n"
48202"#undef cbrt\n"
48203"#define cbrt(__x) __tg_cbrt(__tg_promote1((__x))(__x))\n"
48204"\n"
48205"// ceil\n"
48206"\n"
48207"static float\n"
48208" _TG_ATTRS\n"
48209" __tg_ceil(float __x) {return ceilf(__x);}\n"
48210"\n"
48211"static double\n"
48212" _TG_ATTRS\n"
48213" __tg_ceil(double __x) {return ceil(__x);}\n"
48214"\n"
48215"static long double\n"
48216" _TG_ATTRS\n"
48217" __tg_ceil(long double __x) {return ceill(__x);}\n"
48218"\n"
48219"#undef ceil\n"
48220"#define ceil(__x) __tg_ceil(__tg_promote1((__x))(__x))\n"
48221"\n"
48222"// copysign\n"
48223"\n"
48224"static float\n"
48225" _TG_ATTRS\n"
48226" __tg_copysign(float __x, float __y) {return copysignf(__x, __y);}\n"
48227"\n"
48228"static double\n"
48229" _TG_ATTRS\n"
48230" __tg_copysign(double __x, double __y) {return copysign(__x, __y);}\n"
48231"\n"
48232"static long double\n"
48233" _TG_ATTRS\n"
48234" __tg_copysign(long double __x, long double __y) {return copysignl(__x, __y);}\n"
48235"\n"
48236"#undef copysign\n"
48237"#define copysign(__x, __y) __tg_copysign(__tg_promote2((__x), (__y))(__x), \\\n"
48238" __tg_promote2((__x), (__y))(__y))\n"
48239"\n"
48240"// erf\n"
48241"\n"
48242"static float\n"
48243" _TG_ATTRS\n"
48244" __tg_erf(float __x) {return erff(__x);}\n"
48245"\n"
48246"static double\n"
48247" _TG_ATTRS\n"
48248" __tg_erf(double __x) {return erf(__x);}\n"
48249"\n"
48250"static long double\n"
48251" _TG_ATTRS\n"
48252" __tg_erf(long double __x) {return erfl(__x);}\n"
48253"\n"
48254"#undef erf\n"
48255"#define erf(__x) __tg_erf(__tg_promote1((__x))(__x))\n"
48256"\n"
48257"// erfc\n"
48258"\n"
48259"static float\n"
48260" _TG_ATTRS\n"
48261" __tg_erfc(float __x) {return erfcf(__x);}\n"
48262"\n"
48263"static double\n"
48264" _TG_ATTRS\n"
48265" __tg_erfc(double __x) {return erfc(__x);}\n"
48266"\n"
48267"static long double\n"
48268" _TG_ATTRS\n"
48269" __tg_erfc(long double __x) {return erfcl(__x);}\n"
48270"\n"
48271"#undef erfc\n"
48272"#define erfc(__x) __tg_erfc(__tg_promote1((__x))(__x))\n"
48273"\n"
48274"// exp2\n"
48275"\n"
48276"static float\n"
48277" _TG_ATTRS\n"
48278" __tg_exp2(float __x) {return exp2f(__x);}\n"
48279"\n"
48280"static double\n"
48281" _TG_ATTRS\n"
48282" __tg_exp2(double __x) {return exp2(__x);}\n"
48283"\n"
48284"static long double\n"
48285" _TG_ATTRS\n"
48286" __tg_exp2(long double __x) {return exp2l(__x);}\n"
48287"\n"
48288"#undef exp2\n"
48289"#define exp2(__x) __tg_exp2(__tg_promote1((__x))(__x))\n"
48290"\n"
48291"// expm1\n"
48292"\n"
48293"static float\n"
48294" _TG_ATTRS\n"
48295" __tg_expm1(float __x) {return expm1f(__x);}\n"
48296"\n"
48297"static double\n"
48298" _TG_ATTRS\n"
48299" __tg_expm1(double __x) {return expm1(__x);}\n"
48300"\n"
48301"static long double\n"
48302" _TG_ATTRS\n"
48303" __tg_expm1(long double __x) {return expm1l(__x);}\n"
48304"\n"
48305"#undef expm1\n"
48306"#define expm1(__x) __tg_expm1(__tg_promote1((__x))(__x))\n"
48307"\n"
48308"// fdim\n"
48309"\n"
48310"static float\n"
48311" _TG_ATTRS\n"
48312" __tg_fdim(float __x, float __y) {return fdimf(__x, __y);}\n"
48313"\n"
48314"static double\n"
48315" _TG_ATTRS\n"
48316" __tg_fdim(double __x, double __y) {return fdim(__x, __y);}\n"
48317"\n"
48318"static long double\n"
48319" _TG_ATTRS\n"
48320" __tg_fdim(long double __x, long double __y) {return fdiml(__x, __y);}\n"
48321"\n"
48322"#undef fdim\n"
48323"#define fdim(__x, __y) __tg_fdim(__tg_promote2((__x), (__y))(__x), \\\n"
48324" __tg_promote2((__x), (__y))(__y))\n"
48325"\n"
48326"// floor\n"
48327"\n"
48328"static float\n"
48329" _TG_ATTRS\n"
48330" __tg_floor(float __x) {return floorf(__x);}\n"
48331"\n"
48332"static double\n"
48333" _TG_ATTRS\n"
48334" __tg_floor(double __x) {return floor(__x);}\n"
48335"\n"
48336"static long double\n"
48337" _TG_ATTRS\n"
48338" __tg_floor(long double __x) {return floorl(__x);}\n"
48339"\n"
48340"#undef floor\n"
48341"#define floor(__x) __tg_floor(__tg_promote1((__x))(__x))\n"
48342"\n"
48343"// fma\n"
48344"\n"
48345"static float\n"
48346" _TG_ATTRS\n"
48347" __tg_fma(float __x, float __y, float __z)\n"
48348" {return fmaf(__x, __y, __z);}\n"
48349"\n"
48350"static double\n"
48351" _TG_ATTRS\n"
48352" __tg_fma(double __x, double __y, double __z)\n"
48353" {return fma(__x, __y, __z);}\n"
48354"\n"
48355"static long double\n"
48356" _TG_ATTRS\n"
48357" __tg_fma(long double __x,long double __y, long double __z)\n"
48358" {return fmal(__x, __y, __z);}\n"
48359"\n"
48360"#undef fma\n"
48361"#define fma(__x, __y, __z) \\\n"
48362" __tg_fma(__tg_promote3((__x), (__y), (__z))(__x), \\\n"
48363" __tg_promote3((__x), (__y), (__z))(__y), \\\n"
48364" __tg_promote3((__x), (__y), (__z))(__z))\n"
48365"\n"
48366"// fmax\n"
48367"\n"
48368"static float\n"
48369" _TG_ATTRS\n"
48370" __tg_fmax(float __x, float __y) {return fmaxf(__x, __y);}\n"
48371"\n"
48372"static double\n"
48373" _TG_ATTRS\n"
48374" __tg_fmax(double __x, double __y) {return fmax(__x, __y);}\n"
48375"\n"
48376"static long double\n"
48377" _TG_ATTRS\n"
48378" __tg_fmax(long double __x, long double __y) {return fmaxl(__x, __y);}\n"
48379"\n"
48380"#undef fmax\n"
48381"#define fmax(__x, __y) __tg_fmax(__tg_promote2((__x), (__y))(__x), \\\n"
48382" __tg_promote2((__x), (__y))(__y))\n"
48383"\n"
48384"// fmin\n"
48385"\n"
48386"static float\n"
48387" _TG_ATTRS\n"
48388" __tg_fmin(float __x, float __y) {return fminf(__x, __y);}\n"
48389"\n"
48390"static double\n"
48391" _TG_ATTRS\n"
48392" __tg_fmin(double __x, double __y) {return fmin(__x, __y);}\n"
48393"\n"
48394"static long double\n"
48395" _TG_ATTRS\n"
48396" __tg_fmin(long double __x, long double __y) {return fminl(__x, __y);}\n"
48397"\n"
48398"#undef fmin\n"
48399"#define fmin(__x, __y) __tg_fmin(__tg_promote2((__x), (__y))(__x), \\\n"
48400" __tg_promote2((__x), (__y))(__y))\n"
48401"\n"
48402"// fmod\n"
48403"\n"
48404"static float\n"
48405" _TG_ATTRS\n"
48406" __tg_fmod(float __x, float __y) {return fmodf(__x, __y);}\n"
48407"\n"
48408"static double\n"
48409" _TG_ATTRS\n"
48410" __tg_fmod(double __x, double __y) {return fmod(__x, __y);}\n"
48411"\n"
48412"static long double\n"
48413" _TG_ATTRS\n"
48414" __tg_fmod(long double __x, long double __y) {return fmodl(__x, __y);}\n"
48415"\n"
48416"#undef fmod\n"
48417"#define fmod(__x, __y) __tg_fmod(__tg_promote2((__x), (__y))(__x), \\\n"
48418" __tg_promote2((__x), (__y))(__y))\n"
48419"\n"
48420"// frexp\n"
48421"\n"
48422"static float\n"
48423" _TG_ATTRS\n"
48424" __tg_frexp(float __x, int* __y) {return frexpf(__x, __y);}\n"
48425"\n"
48426"static double\n"
48427" _TG_ATTRS\n"
48428" __tg_frexp(double __x, int* __y) {return frexp(__x, __y);}\n"
48429"\n"
48430"static long double\n"
48431" _TG_ATTRS\n"
48432" __tg_frexp(long double __x, int* __y) {return frexpl(__x, __y);}\n"
48433"\n"
48434"#undef frexp\n"
48435"#define frexp(__x, __y) __tg_frexp(__tg_promote1((__x))(__x), __y)\n"
48436"\n"
48437"// hypot\n"
48438"\n"
48439"static float\n"
48440" _TG_ATTRS\n"
48441" __tg_hypot(float __x, float __y) {return hypotf(__x, __y);}\n"
48442"\n"
48443"static double\n"
48444" _TG_ATTRS\n"
48445" __tg_hypot(double __x, double __y) {return hypot(__x, __y);}\n"
48446"\n"
48447"static long double\n"
48448" _TG_ATTRS\n"
48449" __tg_hypot(long double __x, long double __y) {return hypotl(__x, __y);}\n"
48450"\n"
48451"#undef hypot\n"
48452"#define hypot(__x, __y) __tg_hypot(__tg_promote2((__x), (__y))(__x), \\\n"
48453" __tg_promote2((__x), (__y))(__y))\n"
48454"\n"
48455"// ilogb\n"
48456"\n"
48457"static int\n"
48458" _TG_ATTRS\n"
48459" __tg_ilogb(float __x) {return ilogbf(__x);}\n"
48460"\n"
48461"static int\n"
48462" _TG_ATTRS\n"
48463" __tg_ilogb(double __x) {return ilogb(__x);}\n"
48464"\n"
48465"static int\n"
48466" _TG_ATTRS\n"
48467" __tg_ilogb(long double __x) {return ilogbl(__x);}\n"
48468"\n"
48469"#undef ilogb\n"
48470"#define ilogb(__x) __tg_ilogb(__tg_promote1((__x))(__x))\n"
48471"\n"
48472"// ldexp\n"
48473"\n"
48474"static float\n"
48475" _TG_ATTRS\n"
48476" __tg_ldexp(float __x, int __y) {return ldexpf(__x, __y);}\n"
48477"\n"
48478"static double\n"
48479" _TG_ATTRS\n"
48480" __tg_ldexp(double __x, int __y) {return ldexp(__x, __y);}\n"
48481"\n"
48482"static long double\n"
48483" _TG_ATTRS\n"
48484" __tg_ldexp(long double __x, int __y) {return ldexpl(__x, __y);}\n"
48485"\n"
48486"#undef ldexp\n"
48487"#define ldexp(__x, __y) __tg_ldexp(__tg_promote1((__x))(__x), __y)\n"
48488"\n"
48489"// lgamma\n"
48490"\n"
48491"static float\n"
48492" _TG_ATTRS\n"
48493" __tg_lgamma(float __x) {return lgammaf(__x);}\n"
48494"\n"
48495"static double\n"
48496" _TG_ATTRS\n"
48497" __tg_lgamma(double __x) {return lgamma(__x);}\n"
48498"\n"
48499"static long double\n"
48500" _TG_ATTRS\n"
48501" __tg_lgamma(long double __x) {return lgammal(__x);}\n"
48502"\n"
48503"#undef lgamma\n"
48504"#define lgamma(__x) __tg_lgamma(__tg_promote1((__x))(__x))\n"
48505"\n"
48506"// llrint\n"
48507"\n"
48508"static long long\n"
48509" _TG_ATTRS\n"
48510" __tg_llrint(float __x) {return llrintf(__x);}\n"
48511"\n"
48512"static long long\n"
48513" _TG_ATTRS\n"
48514" __tg_llrint(double __x) {return llrint(__x);}\n"
48515"\n"
48516"static long long\n"
48517" _TG_ATTRS\n"
48518" __tg_llrint(long double __x) {return llrintl(__x);}\n"
48519"\n"
48520"#undef llrint\n"
48521"#define llrint(__x) __tg_llrint(__tg_promote1((__x))(__x))\n"
48522"\n"
48523"// llround\n"
48524"\n"
48525"static long long\n"
48526" _TG_ATTRS\n"
48527" __tg_llround(float __x) {return llroundf(__x);}\n"
48528"\n"
48529"static long long\n"
48530" _TG_ATTRS\n"
48531" __tg_llround(double __x) {return llround(__x);}\n"
48532"\n"
48533"static long long\n"
48534" _TG_ATTRS\n"
48535" __tg_llround(long double __x) {return llroundl(__x);}\n"
48536"\n"
48537"#undef llround\n"
48538"#define llround(__x) __tg_llround(__tg_promote1((__x))(__x))\n"
48539"\n"
48540"// log10\n"
48541"\n"
48542"static float\n"
48543" _TG_ATTRS\n"
48544" __tg_log10(float __x) {return log10f(__x);}\n"
48545"\n"
48546"static double\n"
48547" _TG_ATTRS\n"
48548" __tg_log10(double __x) {return log10(__x);}\n"
48549"\n"
48550"static long double\n"
48551" _TG_ATTRS\n"
48552" __tg_log10(long double __x) {return log10l(__x);}\n"
48553"\n"
48554"#undef log10\n"
48555"#define log10(__x) __tg_log10(__tg_promote1((__x))(__x))\n"
48556"\n"
48557"// log1p\n"
48558"\n"
48559"static float\n"
48560" _TG_ATTRS\n"
48561" __tg_log1p(float __x) {return log1pf(__x);}\n"
48562"\n"
48563"static double\n"
48564" _TG_ATTRS\n"
48565" __tg_log1p(double __x) {return log1p(__x);}\n"
48566"\n"
48567"static long double\n"
48568" _TG_ATTRS\n"
48569" __tg_log1p(long double __x) {return log1pl(__x);}\n"
48570"\n"
48571"#undef log1p\n"
48572"#define log1p(__x) __tg_log1p(__tg_promote1((__x))(__x))\n"
48573"\n"
48574"// log2\n"
48575"\n"
48576"static float\n"
48577" _TG_ATTRS\n"
48578" __tg_log2(float __x) {return log2f(__x);}\n"
48579"\n"
48580"static double\n"
48581" _TG_ATTRS\n"
48582" __tg_log2(double __x) {return log2(__x);}\n"
48583"\n"
48584"static long double\n"
48585" _TG_ATTRS\n"
48586" __tg_log2(long double __x) {return log2l(__x);}\n"
48587"\n"
48588"#undef log2\n"
48589"#define log2(__x) __tg_log2(__tg_promote1((__x))(__x))\n"
48590"\n"
48591"// logb\n"
48592"\n"
48593"static float\n"
48594" _TG_ATTRS\n"
48595" __tg_logb(float __x) {return logbf(__x);}\n"
48596"\n"
48597"static double\n"
48598" _TG_ATTRS\n"
48599" __tg_logb(double __x) {return logb(__x);}\n"
48600"\n"
48601"static long double\n"
48602" _TG_ATTRS\n"
48603" __tg_logb(long double __x) {return logbl(__x);}\n"
48604"\n"
48605"#undef logb\n"
48606"#define logb(__x) __tg_logb(__tg_promote1((__x))(__x))\n"
48607"\n"
48608"// lrint\n"
48609"\n"
48610"static long\n"
48611" _TG_ATTRS\n"
48612" __tg_lrint(float __x) {return lrintf(__x);}\n"
48613"\n"
48614"static long\n"
48615" _TG_ATTRS\n"
48616" __tg_lrint(double __x) {return lrint(__x);}\n"
48617"\n"
48618"static long\n"
48619" _TG_ATTRS\n"
48620" __tg_lrint(long double __x) {return lrintl(__x);}\n"
48621"\n"
48622"#undef lrint\n"
48623"#define lrint(__x) __tg_lrint(__tg_promote1((__x))(__x))\n"
48624"\n"
48625"// lround\n"
48626"\n"
48627"static long\n"
48628" _TG_ATTRS\n"
48629" __tg_lround(float __x) {return lroundf(__x);}\n"
48630"\n"
48631"static long\n"
48632" _TG_ATTRS\n"
48633" __tg_lround(double __x) {return lround(__x);}\n"
48634"\n"
48635"static long\n"
48636" _TG_ATTRS\n"
48637" __tg_lround(long double __x) {return lroundl(__x);}\n"
48638"\n"
48639"#undef lround\n"
48640"#define lround(__x) __tg_lround(__tg_promote1((__x))(__x))\n"
48641"\n"
48642"// nearbyint\n"
48643"\n"
48644"static float\n"
48645" _TG_ATTRS\n"
48646" __tg_nearbyint(float __x) {return nearbyintf(__x);}\n"
48647"\n"
48648"static double\n"
48649" _TG_ATTRS\n"
48650" __tg_nearbyint(double __x) {return nearbyint(__x);}\n"
48651"\n"
48652"static long double\n"
48653" _TG_ATTRS\n"
48654" __tg_nearbyint(long double __x) {return nearbyintl(__x);}\n"
48655"\n"
48656"#undef nearbyint\n"
48657"#define nearbyint(__x) __tg_nearbyint(__tg_promote1((__x))(__x))\n"
48658"\n"
48659"// nextafter\n"
48660"\n"
48661"static float\n"
48662" _TG_ATTRS\n"
48663" __tg_nextafter(float __x, float __y) {return nextafterf(__x, __y);}\n"
48664"\n"
48665"static double\n"
48666" _TG_ATTRS\n"
48667" __tg_nextafter(double __x, double __y) {return nextafter(__x, __y);}\n"
48668"\n"
48669"static long double\n"
48670" _TG_ATTRS\n"
48671" __tg_nextafter(long double __x, long double __y) {return nextafterl(__x, __y);}\n"
48672"\n"
48673"#undef nextafter\n"
48674"#define nextafter(__x, __y) __tg_nextafter(__tg_promote2((__x), (__y))(__x), \\\n"
48675" __tg_promote2((__x), (__y))(__y))\n"
48676"\n"
48677"// nexttoward\n"
48678"\n"
48679"static float\n"
48680" _TG_ATTRS\n"
48681" __tg_nexttoward(float __x, long double __y) {return nexttowardf(__x, __y);}\n"
48682"\n"
48683"static double\n"
48684" _TG_ATTRS\n"
48685" __tg_nexttoward(double __x, long double __y) {return nexttoward(__x, __y);}\n"
48686"\n"
48687"static long double\n"
48688" _TG_ATTRS\n"
48689" __tg_nexttoward(long double __x, long double __y) {return nexttowardl(__x, __y);}\n"
48690"\n"
48691"#undef nexttoward\n"
48692"#define nexttoward(__x, __y) __tg_nexttoward(__tg_promote1((__x))(__x), (__y))\n"
48693"\n"
48694"// remainder\n"
48695"\n"
48696"static float\n"
48697" _TG_ATTRS\n"
48698" __tg_remainder(float __x, float __y) {return remainderf(__x, __y);}\n"
48699"\n"
48700"static double\n"
48701" _TG_ATTRS\n"
48702" __tg_remainder(double __x, double __y) {return remainder(__x, __y);}\n"
48703"\n"
48704"static long double\n"
48705" _TG_ATTRS\n"
48706" __tg_remainder(long double __x, long double __y) {return remainderl(__x, __y);}\n"
48707"\n"
48708"#undef remainder\n"
48709"#define remainder(__x, __y) __tg_remainder(__tg_promote2((__x), (__y))(__x), \\\n"
48710" __tg_promote2((__x), (__y))(__y))\n"
48711"\n"
48712"// remquo\n"
48713"\n"
48714"static float\n"
48715" _TG_ATTRS\n"
48716" __tg_remquo(float __x, float __y, int* __z)\n"
48717" {return remquof(__x, __y, __z);}\n"
48718"\n"
48719"static double\n"
48720" _TG_ATTRS\n"
48721" __tg_remquo(double __x, double __y, int* __z)\n"
48722" {return remquo(__x, __y, __z);}\n"
48723"\n"
48724"static long double\n"
48725" _TG_ATTRS\n"
48726" __tg_remquo(long double __x,long double __y, int* __z)\n"
48727" {return remquol(__x, __y, __z);}\n"
48728"\n"
48729"#undef remquo\n"
48730"#define remquo(__x, __y, __z) \\\n"
48731" __tg_remquo(__tg_promote2((__x), (__y))(__x), \\\n"
48732" __tg_promote2((__x), (__y))(__y), \\\n"
48733" (__z))\n"
48734"\n"
48735"// rint\n"
48736"\n"
48737"static float\n"
48738" _TG_ATTRS\n"
48739" __tg_rint(float __x) {return rintf(__x);}\n"
48740"\n"
48741"static double\n"
48742" _TG_ATTRS\n"
48743" __tg_rint(double __x) {return rint(__x);}\n"
48744"\n"
48745"static long double\n"
48746" _TG_ATTRS\n"
48747" __tg_rint(long double __x) {return rintl(__x);}\n"
48748"\n"
48749"#undef rint\n"
48750"#define rint(__x) __tg_rint(__tg_promote1((__x))(__x))\n"
48751"\n"
48752"// round\n"
48753"\n"
48754"static float\n"
48755" _TG_ATTRS\n"
48756" __tg_round(float __x) {return roundf(__x);}\n"
48757"\n"
48758"static double\n"
48759" _TG_ATTRS\n"
48760" __tg_round(double __x) {return round(__x);}\n"
48761"\n"
48762"static long double\n"
48763" _TG_ATTRS\n"
48764" __tg_round(long double __x) {return roundl(__x);}\n"
48765"\n"
48766"#undef round\n"
48767"#define round(__x) __tg_round(__tg_promote1((__x))(__x))\n"
48768"\n"
48769"// scalbn\n"
48770"\n"
48771"static float\n"
48772" _TG_ATTRS\n"
48773" __tg_scalbn(float __x, int __y) {return scalbnf(__x, __y);}\n"
48774"\n"
48775"static double\n"
48776" _TG_ATTRS\n"
48777" __tg_scalbn(double __x, int __y) {return scalbn(__x, __y);}\n"
48778"\n"
48779"static long double\n"
48780" _TG_ATTRS\n"
48781" __tg_scalbn(long double __x, int __y) {return scalbnl(__x, __y);}\n"
48782"\n"
48783"#undef scalbn\n"
48784"#define scalbn(__x, __y) __tg_scalbn(__tg_promote1((__x))(__x), __y)\n"
48785"\n"
48786"// scalbln\n"
48787"\n"
48788"static float\n"
48789" _TG_ATTRS\n"
48790" __tg_scalbln(float __x, long __y) {return scalblnf(__x, __y);}\n"
48791"\n"
48792"static double\n"
48793" _TG_ATTRS\n"
48794" __tg_scalbln(double __x, long __y) {return scalbln(__x, __y);}\n"
48795"\n"
48796"static long double\n"
48797" _TG_ATTRS\n"
48798" __tg_scalbln(long double __x, long __y) {return scalblnl(__x, __y);}\n"
48799"\n"
48800"#undef scalbln\n"
48801"#define scalbln(__x, __y) __tg_scalbln(__tg_promote1((__x))(__x), __y)\n"
48802"\n"
48803"// tgamma\n"
48804"\n"
48805"static float\n"
48806" _TG_ATTRS\n"
48807" __tg_tgamma(float __x) {return tgammaf(__x);}\n"
48808"\n"
48809"static double\n"
48810" _TG_ATTRS\n"
48811" __tg_tgamma(double __x) {return tgamma(__x);}\n"
48812"\n"
48813"static long double\n"
48814" _TG_ATTRS\n"
48815" __tg_tgamma(long double __x) {return tgammal(__x);}\n"
48816"\n"
48817"#undef tgamma\n"
48818"#define tgamma(__x) __tg_tgamma(__tg_promote1((__x))(__x))\n"
48819"\n"
48820"// trunc\n"
48821"\n"
48822"static float\n"
48823" _TG_ATTRS\n"
48824" __tg_trunc(float __x) {return truncf(__x);}\n"
48825"\n"
48826"static double\n"
48827" _TG_ATTRS\n"
48828" __tg_trunc(double __x) {return trunc(__x);}\n"
48829"\n"
48830"static long double\n"
48831" _TG_ATTRS\n"
48832" __tg_trunc(long double __x) {return truncl(__x);}\n"
48833"\n"
48834"#undef trunc\n"
48835"#define trunc(__x) __tg_trunc(__tg_promote1((__x))(__x))\n"
48836"\n"
48837"// carg\n"
48838"\n"
48839"static float\n"
48840" _TG_ATTRS\n"
48841" __tg_carg(float __x) {return atan2f(0.F, __x);}\n"
48842"\n"
48843"static double\n"
48844" _TG_ATTRS\n"
48845" __tg_carg(double __x) {return atan2(0., __x);}\n"
48846"\n"
48847"static long double\n"
48848" _TG_ATTRS\n"
48849" __tg_carg(long double __x) {return atan2l(0.L, __x);}\n"
48850"\n"
48851"static float\n"
48852" _TG_ATTRS\n"
48853" __tg_carg(float _Complex __x) {return cargf(__x);}\n"
48854"\n"
48855"static double\n"
48856" _TG_ATTRS\n"
48857" __tg_carg(double _Complex __x) {return carg(__x);}\n"
48858"\n"
48859"static long double\n"
48860" _TG_ATTRS\n"
48861" __tg_carg(long double _Complex __x) {return cargl(__x);}\n"
48862"\n"
48863"#undef carg\n"
48864"#define carg(__x) __tg_carg(__tg_promote1((__x))(__x))\n"
48865"\n"
48866"// cimag\n"
48867"\n"
48868"static float\n"
48869" _TG_ATTRS\n"
48870" __tg_cimag(float __x) {return 0;}\n"
48871"\n"
48872"static double\n"
48873" _TG_ATTRS\n"
48874" __tg_cimag(double __x) {return 0;}\n"
48875"\n"
48876"static long double\n"
48877" _TG_ATTRS\n"
48878" __tg_cimag(long double __x) {return 0;}\n"
48879"\n"
48880"static float\n"
48881" _TG_ATTRS\n"
48882" __tg_cimag(float _Complex __x) {return cimagf(__x);}\n"
48883"\n"
48884"static double\n"
48885" _TG_ATTRS\n"
48886" __tg_cimag(double _Complex __x) {return cimag(__x);}\n"
48887"\n"
48888"static long double\n"
48889" _TG_ATTRS\n"
48890" __tg_cimag(long double _Complex __x) {return cimagl(__x);}\n"
48891"\n"
48892"#undef cimag\n"
48893"#define cimag(__x) __tg_cimag(__tg_promote1((__x))(__x))\n"
48894"\n"
48895"// conj\n"
48896"\n"
48897"static float _Complex\n"
48898" _TG_ATTRS\n"
48899" __tg_conj(float __x) {return __x;}\n"
48900"\n"
48901"static double _Complex\n"
48902" _TG_ATTRS\n"
48903" __tg_conj(double __x) {return __x;}\n"
48904"\n"
48905"static long double _Complex\n"
48906" _TG_ATTRS\n"
48907" __tg_conj(long double __x) {return __x;}\n"
48908"\n"
48909"static float _Complex\n"
48910" _TG_ATTRS\n"
48911" __tg_conj(float _Complex __x) {return conjf(__x);}\n"
48912"\n"
48913"static double _Complex\n"
48914" _TG_ATTRS\n"
48915" __tg_conj(double _Complex __x) {return conj(__x);}\n"
48916"\n"
48917"static long double _Complex\n"
48918" _TG_ATTRS\n"
48919" __tg_conj(long double _Complex __x) {return conjl(__x);}\n"
48920"\n"
48921"#undef conj\n"
48922"#define conj(__x) __tg_conj(__tg_promote1((__x))(__x))\n"
48923"\n"
48924"// cproj\n"
48925"\n"
48926"static float _Complex\n"
48927" _TG_ATTRS\n"
48928" __tg_cproj(float __x) {return cprojf(__x);}\n"
48929"\n"
48930"static double _Complex\n"
48931" _TG_ATTRS\n"
48932" __tg_cproj(double __x) {return cproj(__x);}\n"
48933"\n"
48934"static long double _Complex\n"
48935" _TG_ATTRS\n"
48936" __tg_cproj(long double __x) {return cprojl(__x);}\n"
48937"\n"
48938"static float _Complex\n"
48939" _TG_ATTRS\n"
48940" __tg_cproj(float _Complex __x) {return cprojf(__x);}\n"
48941"\n"
48942"static double _Complex\n"
48943" _TG_ATTRS\n"
48944" __tg_cproj(double _Complex __x) {return cproj(__x);}\n"
48945"\n"
48946"static long double _Complex\n"
48947" _TG_ATTRS\n"
48948" __tg_cproj(long double _Complex __x) {return cprojl(__x);}\n"
48949"\n"
48950"#undef cproj\n"
48951"#define cproj(__x) __tg_cproj(__tg_promote1((__x))(__x))\n"
48952"\n"
48953"// creal\n"
48954"\n"
48955"static float\n"
48956" _TG_ATTRS\n"
48957" __tg_creal(float __x) {return __x;}\n"
48958"\n"
48959"static double\n"
48960" _TG_ATTRS\n"
48961" __tg_creal(double __x) {return __x;}\n"
48962"\n"
48963"static long double\n"
48964" _TG_ATTRS\n"
48965" __tg_creal(long double __x) {return __x;}\n"
48966"\n"
48967"static float\n"
48968" _TG_ATTRS\n"
48969" __tg_creal(float _Complex __x) {return crealf(__x);}\n"
48970"\n"
48971"static double\n"
48972" _TG_ATTRS\n"
48973" __tg_creal(double _Complex __x) {return creal(__x);}\n"
48974"\n"
48975"static long double\n"
48976" _TG_ATTRS\n"
48977" __tg_creal(long double _Complex __x) {return creall(__x);}\n"
48978"\n"
48979"#undef creal\n"
48980"#define creal(__x) __tg_creal(__tg_promote1((__x))(__x))\n"
48981"\n"
48982"#undef _TG_ATTRSp\n"
48983"#undef _TG_ATTRS\n"
48984"\n"
48985"#endif /* __cplusplus */\n"
48986"#endif /* __has_include_next */\n"
48987"#endif /* __CLANG_TGMATH_H */\n"
48988"" } ,
48989 { "/builtins/tmmintrin.h" , "/*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===\n"
48990" *\n"
48991" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
48992" * of this software and associated documentation files (the \"Software\"), to deal\n"
48993" * in the Software without restriction, including without limitation the rights\n"
48994" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
48995" * copies of the Software, and to permit persons to whom the Software is\n"
48996" * furnished to do so, subject to the following conditions:\n"
48997" *\n"
48998" * The above copyright notice and this permission notice shall be included in\n"
48999" * all copies or substantial portions of the Software.\n"
49000" *\n"
49001" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
49002" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
49003" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
49004" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
49005" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
49006" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
49007" * THE SOFTWARE.\n"
49008" *\n"
49009" *===-----------------------------------------------------------------------===\n"
49010" */\n"
49011"\n"
49012"#ifndef __TMMINTRIN_H\n"
49013"#define __TMMINTRIN_H\n"
49014"\n"
49015"#include <pmmintrin.h>\n"
49016"\n"
49017"/* Define the default attributes for the functions in this file. */\n"
49018"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"ssse3\"), __min_vector_width__(64)))\n"
49019"#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__(\"mmx,ssse3\"), __min_vector_width__(64)))\n"
49020"\n"
49021"/// Computes the absolute value of each of the packed 8-bit signed\n"
49022"/// integers in the source operand and stores the 8-bit unsigned integer\n"
49023"/// results in the destination.\n"
49024"///\n"
49025"/// \\headerfile <x86intrin.h>\n"
49026"///\n"
49027"/// This intrinsic corresponds to the \\c PABSB instruction.\n"
49028"///\n"
49029"/// \\param __a\n"
49030"/// A 64-bit vector of [8 x i8].\n"
49031"/// \\returns A 64-bit integer vector containing the absolute values of the\n"
49032"/// elements in the operand.\n"
49033"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
49034"_mm_abs_pi8(__m64 __a)\n"
49035"{\n"
49036" return (__m64)__builtin_ia32_pabsb((__v8qi)__a);\n"
49037"}\n"
49038"\n"
49039"/// Computes the absolute value of each of the packed 8-bit signed\n"
49040"/// integers in the source operand and stores the 8-bit unsigned integer\n"
49041"/// results in the destination.\n"
49042"///\n"
49043"/// \\headerfile <x86intrin.h>\n"
49044"///\n"
49045"/// This intrinsic corresponds to the \\c VPABSB instruction.\n"
49046"///\n"
49047"/// \\param __a\n"
49048"/// A 128-bit vector of [16 x i8].\n"
49049"/// \\returns A 128-bit integer vector containing the absolute values of the\n"
49050"/// elements in the operand.\n"
49051"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
49052"_mm_abs_epi8(__m128i __a)\n"
49053"{\n"
49054" return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a);\n"
49055"}\n"
49056"\n"
49057"/// Computes the absolute value of each of the packed 16-bit signed\n"
49058"/// integers in the source operand and stores the 16-bit unsigned integer\n"
49059"/// results in the destination.\n"
49060"///\n"
49061"/// \\headerfile <x86intrin.h>\n"
49062"///\n"
49063"/// This intrinsic corresponds to the \\c PABSW instruction.\n"
49064"///\n"
49065"/// \\param __a\n"
49066"/// A 64-bit vector of [4 x i16].\n"
49067"/// \\returns A 64-bit integer vector containing the absolute values of the\n"
49068"/// elements in the operand.\n"
49069"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
49070"_mm_abs_pi16(__m64 __a)\n"
49071"{\n"
49072" return (__m64)__builtin_ia32_pabsw((__v4hi)__a);\n"
49073"}\n"
49074"\n"
49075"/// Computes the absolute value of each of the packed 16-bit signed\n"
49076"/// integers in the source operand and stores the 16-bit unsigned integer\n"
49077"/// results in the destination.\n"
49078"///\n"
49079"/// \\headerfile <x86intrin.h>\n"
49080"///\n"
49081"/// This intrinsic corresponds to the \\c VPABSW instruction.\n"
49082"///\n"
49083"/// \\param __a\n"
49084"/// A 128-bit vector of [8 x i16].\n"
49085"/// \\returns A 128-bit integer vector containing the absolute values of the\n"
49086"/// elements in the operand.\n"
49087"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
49088"_mm_abs_epi16(__m128i __a)\n"
49089"{\n"
49090" return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a);\n"
49091"}\n"
49092"\n"
49093"/// Computes the absolute value of each of the packed 32-bit signed\n"
49094"/// integers in the source operand and stores the 32-bit unsigned integer\n"
49095"/// results in the destination.\n"
49096"///\n"
49097"/// \\headerfile <x86intrin.h>\n"
49098"///\n"
49099"/// This intrinsic corresponds to the \\c PABSD instruction.\n"
49100"///\n"
49101"/// \\param __a\n"
49102"/// A 64-bit vector of [2 x i32].\n"
49103"/// \\returns A 64-bit integer vector containing the absolute values of the\n"
49104"/// elements in the operand.\n"
49105"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
49106"_mm_abs_pi32(__m64 __a)\n"
49107"{\n"
49108" return (__m64)__builtin_ia32_pabsd((__v2si)__a);\n"
49109"}\n"
49110"\n"
49111"/// Computes the absolute value of each of the packed 32-bit signed\n"
49112"/// integers in the source operand and stores the 32-bit unsigned integer\n"
49113"/// results in the destination.\n"
49114"///\n"
49115"/// \\headerfile <x86intrin.h>\n"
49116"///\n"
49117"/// This intrinsic corresponds to the \\c VPABSD instruction.\n"
49118"///\n"
49119"/// \\param __a\n"
49120"/// A 128-bit vector of [4 x i32].\n"
49121"/// \\returns A 128-bit integer vector containing the absolute values of the\n"
49122"/// elements in the operand.\n"
49123"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
49124"_mm_abs_epi32(__m128i __a)\n"
49125"{\n"
49126" return (__m128i)__builtin_ia32_pabsd128((__v4si)__a);\n"
49127"}\n"
49128"\n"
49129"/// Concatenates the two 128-bit integer vector operands, and\n"
49130"/// right-shifts the result by the number of bytes specified in the immediate\n"
49131"/// operand.\n"
49132"///\n"
49133"/// \\headerfile <x86intrin.h>\n"
49134"///\n"
49135"/// \\code\n"
49136"/// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n);\n"
49137"/// \\endcode\n"
49138"///\n"
49139"/// This intrinsic corresponds to the \\c PALIGNR instruction.\n"
49140"///\n"
49141"/// \\param a\n"
49142"/// A 128-bit vector of [16 x i8] containing one of the source operands.\n"
49143"/// \\param b\n"
49144"/// A 128-bit vector of [16 x i8] containing one of the source operands.\n"
49145"/// \\param n\n"
49146"/// An immediate operand specifying how many bytes to right-shift the result.\n"
49147"/// \\returns A 128-bit integer vector containing the concatenated right-shifted\n"
49148"/// value.\n"
49149"#define _mm_alignr_epi8(a, b, n) \\\n"
49150" (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \\\n"
49151" (__v16qi)(__m128i)(b), (n))\n"
49152"\n"
49153"/// Concatenates the two 64-bit integer vector operands, and right-shifts\n"
49154"/// the result by the number of bytes specified in the immediate operand.\n"
49155"///\n"
49156"/// \\headerfile <x86intrin.h>\n"
49157"///\n"
49158"/// \\code\n"
49159"/// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n);\n"
49160"/// \\endcode\n"
49161"///\n"
49162"/// This intrinsic corresponds to the \\c PALIGNR instruction.\n"
49163"///\n"
49164"/// \\param a\n"
49165"/// A 64-bit vector of [8 x i8] containing one of the source operands.\n"
49166"/// \\param b\n"
49167"/// A 64-bit vector of [8 x i8] containing one of the source operands.\n"
49168"/// \\param n\n"
49169"/// An immediate operand specifying how many bytes to right-shift the result.\n"
49170"/// \\returns A 64-bit integer vector containing the concatenated right-shifted\n"
49171"/// value.\n"
49172"#define _mm_alignr_pi8(a, b, n) \\\n"
49173" (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n))\n"
49174"\n"
49175"/// Horizontally adds the adjacent pairs of values contained in 2 packed\n"
49176"/// 128-bit vectors of [8 x i16].\n"
49177"///\n"
49178"/// \\headerfile <x86intrin.h>\n"
49179"///\n"
49180"/// This intrinsic corresponds to the \\c VPHADDW instruction.\n"
49181"///\n"
49182"/// \\param __a\n"
49183"/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n"
49184"/// horizontal sums of the values are stored in the lower bits of the\n"
49185"/// destination.\n"
49186"/// \\param __b\n"
49187"/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n"
49188"/// horizontal sums of the values are stored in the upper bits of the\n"
49189"/// destination.\n"
49190"/// \\returns A 128-bit vector of [8 x i16] containing the horizontal sums of\n"
49191"/// both operands.\n"
49192"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
49193"_mm_hadd_epi16(__m128i __a, __m128i __b)\n"
49194"{\n"
49195" return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);\n"
49196"}\n"
49197"\n"
49198"/// Horizontally adds the adjacent pairs of values contained in 2 packed\n"
49199"/// 128-bit vectors of [4 x i32].\n"
49200"///\n"
49201"/// \\headerfile <x86intrin.h>\n"
49202"///\n"
49203"/// This intrinsic corresponds to the \\c VPHADDD instruction.\n"
49204"///\n"
49205"/// \\param __a\n"
49206"/// A 128-bit vector of [4 x i32] containing one of the source operands. The\n"
49207"/// horizontal sums of the values are stored in the lower bits of the\n"
49208"/// destination.\n"
49209"/// \\param __b\n"
49210"/// A 128-bit vector of [4 x i32] containing one of the source operands. The\n"
49211"/// horizontal sums of the values are stored in the upper bits of the\n"
49212"/// destination.\n"
49213"/// \\returns A 128-bit vector of [4 x i32] containing the horizontal sums of\n"
49214"/// both operands.\n"
49215"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
49216"_mm_hadd_epi32(__m128i __a, __m128i __b)\n"
49217"{\n"
49218" return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);\n"
49219"}\n"
49220"\n"
49221"/// Horizontally adds the adjacent pairs of values contained in 2 packed\n"
49222"/// 64-bit vectors of [4 x i16].\n"
49223"///\n"
49224"/// \\headerfile <x86intrin.h>\n"
49225"///\n"
49226"/// This intrinsic corresponds to the \\c PHADDW instruction.\n"
49227"///\n"
49228"/// \\param __a\n"
49229"/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n"
49230"/// horizontal sums of the values are stored in the lower bits of the\n"
49231"/// destination.\n"
49232"/// \\param __b\n"
49233"/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n"
49234"/// horizontal sums of the values are stored in the upper bits of the\n"
49235"/// destination.\n"
49236"/// \\returns A 64-bit vector of [4 x i16] containing the horizontal sums of both\n"
49237"/// operands.\n"
49238"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
49239"_mm_hadd_pi16(__m64 __a, __m64 __b)\n"
49240"{\n"
49241" return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);\n"
49242"}\n"
49243"\n"
49244"/// Horizontally adds the adjacent pairs of values contained in 2 packed\n"
49245"/// 64-bit vectors of [2 x i32].\n"
49246"///\n"
49247"/// \\headerfile <x86intrin.h>\n"
49248"///\n"
49249"/// This intrinsic corresponds to the \\c PHADDD instruction.\n"
49250"///\n"
49251"/// \\param __a\n"
49252"/// A 64-bit vector of [2 x i32] containing one of the source operands. The\n"
49253"/// horizontal sums of the values are stored in the lower bits of the\n"
49254"/// destination.\n"
49255"/// \\param __b\n"
49256"/// A 64-bit vector of [2 x i32] containing one of the source operands. The\n"
49257"/// horizontal sums of the values are stored in the upper bits of the\n"
49258"/// destination.\n"
49259"/// \\returns A 64-bit vector of [2 x i32] containing the horizontal sums of both\n"
49260"/// operands.\n"
49261"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
49262"_mm_hadd_pi32(__m64 __a, __m64 __b)\n"
49263"{\n"
49264" return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);\n"
49265"}\n"
49266"\n"
49267"/// Horizontally adds the adjacent pairs of values contained in 2 packed\n"
49268"/// 128-bit vectors of [8 x i16]. Positive sums greater than 0x7FFF are\n"
49269"/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to\n"
49270"/// 0x8000.\n"
49271"///\n"
49272"/// \\headerfile <x86intrin.h>\n"
49273"///\n"
49274"/// This intrinsic corresponds to the \\c VPHADDSW instruction.\n"
49275"///\n"
49276"/// \\param __a\n"
49277"/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n"
49278"/// horizontal sums of the values are stored in the lower bits of the\n"
49279"/// destination.\n"
49280"/// \\param __b\n"
49281"/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n"
49282"/// horizontal sums of the values are stored in the upper bits of the\n"
49283"/// destination.\n"
49284"/// \\returns A 128-bit vector of [8 x i16] containing the horizontal saturated\n"
49285"/// sums of both operands.\n"
49286"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
49287"_mm_hadds_epi16(__m128i __a, __m128i __b)\n"
49288"{\n"
49289" return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);\n"
49290"}\n"
49291"\n"
49292"/// Horizontally adds the adjacent pairs of values contained in 2 packed\n"
49293"/// 64-bit vectors of [4 x i16]. Positive sums greater than 0x7FFF are\n"
49294"/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to\n"
49295"/// 0x8000.\n"
49296"///\n"
49297"/// \\headerfile <x86intrin.h>\n"
49298"///\n"
49299"/// This intrinsic corresponds to the \\c PHADDSW instruction.\n"
49300"///\n"
49301"/// \\param __a\n"
49302"/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n"
49303"/// horizontal sums of the values are stored in the lower bits of the\n"
49304"/// destination.\n"
49305"/// \\param __b\n"
49306"/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n"
49307"/// horizontal sums of the values are stored in the upper bits of the\n"
49308"/// destination.\n"
49309"/// \\returns A 64-bit vector of [4 x i16] containing the horizontal saturated\n"
49310"/// sums of both operands.\n"
49311"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
49312"_mm_hadds_pi16(__m64 __a, __m64 __b)\n"
49313"{\n"
49314" return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);\n"
49315"}\n"
49316"\n"
49317"/// Horizontally subtracts the adjacent pairs of values contained in 2\n"
49318"/// packed 128-bit vectors of [8 x i16].\n"
49319"///\n"
49320"/// \\headerfile <x86intrin.h>\n"
49321"///\n"
49322"/// This intrinsic corresponds to the \\c VPHSUBW instruction.\n"
49323"///\n"
49324"/// \\param __a\n"
49325"/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n"
49326"/// horizontal differences between the values are stored in the lower bits of\n"
49327"/// the destination.\n"
49328"/// \\param __b\n"
49329"/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n"
49330"/// horizontal differences between the values are stored in the upper bits of\n"
49331"/// the destination.\n"
49332"/// \\returns A 128-bit vector of [8 x i16] containing the horizontal differences\n"
49333"/// of both operands.\n"
49334"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
49335"_mm_hsub_epi16(__m128i __a, __m128i __b)\n"
49336"{\n"
49337" return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);\n"
49338"}\n"
49339"\n"
49340"/// Horizontally subtracts the adjacent pairs of values contained in 2\n"
49341"/// packed 128-bit vectors of [4 x i32].\n"
49342"///\n"
49343"/// \\headerfile <x86intrin.h>\n"
49344"///\n"
49345"/// This intrinsic corresponds to the \\c VPHSUBD instruction.\n"
49346"///\n"
49347"/// \\param __a\n"
49348"/// A 128-bit vector of [4 x i32] containing one of the source operands. The\n"
49349"/// horizontal differences between the values are stored in the lower bits of\n"
49350"/// the destination.\n"
49351"/// \\param __b\n"
49352"/// A 128-bit vector of [4 x i32] containing one of the source operands. The\n"
49353"/// horizontal differences between the values are stored in the upper bits of\n"
49354"/// the destination.\n"
49355"/// \\returns A 128-bit vector of [4 x i32] containing the horizontal differences\n"
49356"/// of both operands.\n"
49357"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
49358"_mm_hsub_epi32(__m128i __a, __m128i __b)\n"
49359"{\n"
49360" return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);\n"
49361"}\n"
49362"\n"
49363"/// Horizontally subtracts the adjacent pairs of values contained in 2\n"
49364"/// packed 64-bit vectors of [4 x i16].\n"
49365"///\n"
49366"/// \\headerfile <x86intrin.h>\n"
49367"///\n"
49368"/// This intrinsic corresponds to the \\c PHSUBW instruction.\n"
49369"///\n"
49370"/// \\param __a\n"
49371"/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n"
49372"/// horizontal differences between the values are stored in the lower bits of\n"
49373"/// the destination.\n"
49374"/// \\param __b\n"
49375"/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n"
49376"/// horizontal differences between the values are stored in the upper bits of\n"
49377"/// the destination.\n"
49378"/// \\returns A 64-bit vector of [4 x i16] containing the horizontal differences\n"
49379"/// of both operands.\n"
49380"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
49381"_mm_hsub_pi16(__m64 __a, __m64 __b)\n"
49382"{\n"
49383" return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);\n"
49384"}\n"
49385"\n"
49386"/// Horizontally subtracts the adjacent pairs of values contained in 2\n"
49387"/// packed 64-bit vectors of [2 x i32].\n"
49388"///\n"
49389"/// \\headerfile <x86intrin.h>\n"
49390"///\n"
49391"/// This intrinsic corresponds to the \\c PHSUBD instruction.\n"
49392"///\n"
49393"/// \\param __a\n"
49394"/// A 64-bit vector of [2 x i32] containing one of the source operands. The\n"
49395"/// horizontal differences between the values are stored in the lower bits of\n"
49396"/// the destination.\n"
49397"/// \\param __b\n"
49398"/// A 64-bit vector of [2 x i32] containing one of the source operands. The\n"
49399"/// horizontal differences between the values are stored in the upper bits of\n"
49400"/// the destination.\n"
49401"/// \\returns A 64-bit vector of [2 x i32] containing the horizontal differences\n"
49402"/// of both operands.\n"
49403"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
49404"_mm_hsub_pi32(__m64 __a, __m64 __b)\n"
49405"{\n"
49406" return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);\n"
49407"}\n"
49408"\n"
49409"/// Horizontally subtracts the adjacent pairs of values contained in 2\n"
49410"/// packed 128-bit vectors of [8 x i16]. Positive differences greater than\n"
49411"/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are\n"
49412"/// saturated to 0x8000.\n"
49413"///\n"
49414"/// \\headerfile <x86intrin.h>\n"
49415"///\n"
49416"/// This intrinsic corresponds to the \\c VPHSUBSW instruction.\n"
49417"///\n"
49418"/// \\param __a\n"
49419"/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n"
49420"/// horizontal differences between the values are stored in the lower bits of\n"
49421"/// the destination.\n"
49422"/// \\param __b\n"
49423"/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n"
49424"/// horizontal differences between the values are stored in the upper bits of\n"
49425"/// the destination.\n"
49426"/// \\returns A 128-bit vector of [8 x i16] containing the horizontal saturated\n"
49427"/// differences of both operands.\n"
49428"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
49429"_mm_hsubs_epi16(__m128i __a, __m128i __b)\n"
49430"{\n"
49431" return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);\n"
49432"}\n"
49433"\n"
49434"/// Horizontally subtracts the adjacent pairs of values contained in 2\n"
49435"/// packed 64-bit vectors of [4 x i16]. Positive differences greater than\n"
49436"/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are\n"
49437"/// saturated to 0x8000.\n"
49438"///\n"
49439"/// \\headerfile <x86intrin.h>\n"
49440"///\n"
49441"/// This intrinsic corresponds to the \\c PHSUBSW instruction.\n"
49442"///\n"
49443"/// \\param __a\n"
49444"/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n"
49445"/// horizontal differences between the values are stored in the lower bits of\n"
49446"/// the destination.\n"
49447"/// \\param __b\n"
49448"/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n"
49449"/// horizontal differences between the values are stored in the upper bits of\n"
49450"/// the destination.\n"
49451"/// \\returns A 64-bit vector of [4 x i16] containing the horizontal saturated\n"
49452"/// differences of both operands.\n"
49453"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
49454"_mm_hsubs_pi16(__m64 __a, __m64 __b)\n"
49455"{\n"
49456" return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);\n"
49457"}\n"
49458"\n"
49459"/// Multiplies corresponding pairs of packed 8-bit unsigned integer\n"
49460"/// values contained in the first source operand and packed 8-bit signed\n"
49461"/// integer values contained in the second source operand, adds pairs of\n"
49462"/// contiguous products with signed saturation, and writes the 16-bit sums to\n"
49463"/// the corresponding bits in the destination.\n"
49464"///\n"
49465"/// For example, bits [7:0] of both operands are multiplied, bits [15:8] of\n"
49466"/// both operands are multiplied, and the sum of both results is written to\n"
49467"/// bits [15:0] of the destination.\n"
49468"///\n"
49469"/// \\headerfile <x86intrin.h>\n"
49470"///\n"
49471"/// This intrinsic corresponds to the \\c VPMADDUBSW instruction.\n"
49472"///\n"
49473"/// \\param __a\n"
49474"/// A 128-bit integer vector containing the first source operand.\n"
49475"/// \\param __b\n"
49476"/// A 128-bit integer vector containing the second source operand.\n"
49477"/// \\returns A 128-bit integer vector containing the sums of products of both\n"
49478"/// operands: \\n\n"
49479"/// \\a R0 := (\\a __a0 * \\a __b0) + (\\a __a1 * \\a __b1) \\n\n"
49480"/// \\a R1 := (\\a __a2 * \\a __b2) + (\\a __a3 * \\a __b3) \\n\n"
49481"/// \\a R2 := (\\a __a4 * \\a __b4) + (\\a __a5 * \\a __b5) \\n\n"
49482"/// \\a R3 := (\\a __a6 * \\a __b6) + (\\a __a7 * \\a __b7) \\n\n"
49483"/// \\a R4 := (\\a __a8 * \\a __b8) + (\\a __a9 * \\a __b9) \\n\n"
49484"/// \\a R5 := (\\a __a10 * \\a __b10) + (\\a __a11 * \\a __b11) \\n\n"
49485"/// \\a R6 := (\\a __a12 * \\a __b12) + (\\a __a13 * \\a __b13) \\n\n"
49486"/// \\a R7 := (\\a __a14 * \\a __b14) + (\\a __a15 * \\a __b15)\n"
49487"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
49488"_mm_maddubs_epi16(__m128i __a, __m128i __b)\n"
49489"{\n"
49490" return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);\n"
49491"}\n"
49492"\n"
49493"/// Multiplies corresponding pairs of packed 8-bit unsigned integer\n"
49494"/// values contained in the first source operand and packed 8-bit signed\n"
49495"/// integer values contained in the second source operand, adds pairs of\n"
49496"/// contiguous products with signed saturation, and writes the 16-bit sums to\n"
49497"/// the corresponding bits in the destination.\n"
49498"///\n"
49499"/// For example, bits [7:0] of both operands are multiplied, bits [15:8] of\n"
49500"/// both operands are multiplied, and the sum of both results is written to\n"
49501"/// bits [15:0] of the destination.\n"
49502"///\n"
49503"/// \\headerfile <x86intrin.h>\n"
49504"///\n"
49505"/// This intrinsic corresponds to the \\c PMADDUBSW instruction.\n"
49506"///\n"
49507"/// \\param __a\n"
49508"/// A 64-bit integer vector containing the first source operand.\n"
49509"/// \\param __b\n"
49510"/// A 64-bit integer vector containing the second source operand.\n"
49511"/// \\returns A 64-bit integer vector containing the sums of products of both\n"
49512"/// operands: \\n\n"
49513"/// \\a R0 := (\\a __a0 * \\a __b0) + (\\a __a1 * \\a __b1) \\n\n"
49514"/// \\a R1 := (\\a __a2 * \\a __b2) + (\\a __a3 * \\a __b3) \\n\n"
49515"/// \\a R2 := (\\a __a4 * \\a __b4) + (\\a __a5 * \\a __b5) \\n\n"
49516"/// \\a R3 := (\\a __a6 * \\a __b6) + (\\a __a7 * \\a __b7)\n"
49517"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
49518"_mm_maddubs_pi16(__m64 __a, __m64 __b)\n"
49519"{\n"
49520" return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);\n"
49521"}\n"
49522"\n"
49523"/// Multiplies packed 16-bit signed integer values, truncates the 32-bit\n"
49524"/// products to the 18 most significant bits by right-shifting, rounds the\n"
49525"/// truncated value by adding 1, and writes bits [16:1] to the destination.\n"
49526"///\n"
49527"/// \\headerfile <x86intrin.h>\n"
49528"///\n"
49529"/// This intrinsic corresponds to the \\c VPMULHRSW instruction.\n"
49530"///\n"
49531"/// \\param __a\n"
49532"/// A 128-bit vector of [8 x i16] containing one of the source operands.\n"
49533"/// \\param __b\n"
49534"/// A 128-bit vector of [8 x i16] containing one of the source operands.\n"
49535"/// \\returns A 128-bit vector of [8 x i16] containing the rounded and scaled\n"
49536"/// products of both operands.\n"
49537"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
49538"_mm_mulhrs_epi16(__m128i __a, __m128i __b)\n"
49539"{\n"
49540" return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);\n"
49541"}\n"
49542"\n"
49543"/// Multiplies packed 16-bit signed integer values, truncates the 32-bit\n"
49544"/// products to the 18 most significant bits by right-shifting, rounds the\n"
49545"/// truncated value by adding 1, and writes bits [16:1] to the destination.\n"
49546"///\n"
49547"/// \\headerfile <x86intrin.h>\n"
49548"///\n"
49549"/// This intrinsic corresponds to the \\c PMULHRSW instruction.\n"
49550"///\n"
49551"/// \\param __a\n"
49552"/// A 64-bit vector of [4 x i16] containing one of the source operands.\n"
49553"/// \\param __b\n"
49554"/// A 64-bit vector of [4 x i16] containing one of the source operands.\n"
49555"/// \\returns A 64-bit vector of [4 x i16] containing the rounded and scaled\n"
49556"/// products of both operands.\n"
49557"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
49558"_mm_mulhrs_pi16(__m64 __a, __m64 __b)\n"
49559"{\n"
49560" return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);\n"
49561"}\n"
49562"\n"
49563"/// Copies the 8-bit integers from a 128-bit integer vector to the\n"
49564"/// destination or clears 8-bit values in the destination, as specified by\n"
49565"/// the second source operand.\n"
49566"///\n"
49567"/// \\headerfile <x86intrin.h>\n"
49568"///\n"
49569"/// This intrinsic corresponds to the \\c VPSHUFB instruction.\n"
49570"///\n"
49571"/// \\param __a\n"
49572"/// A 128-bit integer vector containing the values to be copied.\n"
49573"/// \\param __b\n"
49574"/// A 128-bit integer vector containing control bytes corresponding to\n"
49575"/// positions in the destination:\n"
49576"/// Bit 7: \\n\n"
49577"/// 1: Clear the corresponding byte in the destination. \\n\n"
49578"/// 0: Copy the selected source byte to the corresponding byte in the\n"
49579"/// destination. \\n\n"
49580"/// Bits [6:4] Reserved. \\n\n"
49581"/// Bits [3:0] select the source byte to be copied.\n"
49582"/// \\returns A 128-bit integer vector containing the copied or cleared values.\n"
49583"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
49584"_mm_shuffle_epi8(__m128i __a, __m128i __b)\n"
49585"{\n"
49586" return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);\n"
49587"}\n"
49588"\n"
49589"/// Copies the 8-bit integers from a 64-bit integer vector to the\n"
49590"/// destination or clears 8-bit values in the destination, as specified by\n"
49591"/// the second source operand.\n"
49592"///\n"
49593"/// \\headerfile <x86intrin.h>\n"
49594"///\n"
49595"/// This intrinsic corresponds to the \\c PSHUFB instruction.\n"
49596"///\n"
49597"/// \\param __a\n"
49598"/// A 64-bit integer vector containing the values to be copied.\n"
49599"/// \\param __b\n"
49600"/// A 64-bit integer vector containing control bytes corresponding to\n"
49601"/// positions in the destination:\n"
49602"/// Bit 7: \\n\n"
49603"/// 1: Clear the corresponding byte in the destination. \\n\n"
49604"/// 0: Copy the selected source byte to the corresponding byte in the\n"
49605"/// destination. \\n\n"
49606"/// Bits [3:0] select the source byte to be copied.\n"
49607"/// \\returns A 64-bit integer vector containing the copied or cleared values.\n"
49608"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
49609"_mm_shuffle_pi8(__m64 __a, __m64 __b)\n"
49610"{\n"
49611" return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);\n"
49612"}\n"
49613"\n"
49614"/// For each 8-bit integer in the first source operand, perform one of\n"
49615"/// the following actions as specified by the second source operand.\n"
49616"///\n"
49617"/// If the byte in the second source is negative, calculate the two's\n"
49618"/// complement of the corresponding byte in the first source, and write that\n"
49619"/// value to the destination. If the byte in the second source is positive,\n"
49620"/// copy the corresponding byte from the first source to the destination. If\n"
49621"/// the byte in the second source is zero, clear the corresponding byte in\n"
49622"/// the destination.\n"
49623"///\n"
49624"/// \\headerfile <x86intrin.h>\n"
49625"///\n"
49626"/// This intrinsic corresponds to the \\c VPSIGNB instruction.\n"
49627"///\n"
49628"/// \\param __a\n"
49629"/// A 128-bit integer vector containing the values to be copied.\n"
49630"/// \\param __b\n"
49631"/// A 128-bit integer vector containing control bytes corresponding to\n"
49632"/// positions in the destination.\n"
49633"/// \\returns A 128-bit integer vector containing the resultant values.\n"
49634"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
49635"_mm_sign_epi8(__m128i __a, __m128i __b)\n"
49636"{\n"
49637" return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);\n"
49638"}\n"
49639"\n"
49640"/// For each 16-bit integer in the first source operand, perform one of\n"
49641"/// the following actions as specified by the second source operand.\n"
49642"///\n"
49643"/// If the word in the second source is negative, calculate the two's\n"
49644"/// complement of the corresponding word in the first source, and write that\n"
49645"/// value to the destination. If the word in the second source is positive,\n"
49646"/// copy the corresponding word from the first source to the destination. If\n"
49647"/// the word in the second source is zero, clear the corresponding word in\n"
49648"/// the destination.\n"
49649"///\n"
49650"/// \\headerfile <x86intrin.h>\n"
49651"///\n"
49652"/// This intrinsic corresponds to the \\c VPSIGNW instruction.\n"
49653"///\n"
49654"/// \\param __a\n"
49655"/// A 128-bit integer vector containing the values to be copied.\n"
49656"/// \\param __b\n"
49657"/// A 128-bit integer vector containing control words corresponding to\n"
49658"/// positions in the destination.\n"
49659"/// \\returns A 128-bit integer vector containing the resultant values.\n"
49660"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
49661"_mm_sign_epi16(__m128i __a, __m128i __b)\n"
49662"{\n"
49663" return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);\n"
49664"}\n"
49665"\n"
49666"/// For each 32-bit integer in the first source operand, perform one of\n"
49667"/// the following actions as specified by the second source operand.\n"
49668"///\n"
49669"/// If the doubleword in the second source is negative, calculate the two's\n"
49670"/// complement of the corresponding word in the first source, and write that\n"
49671"/// value to the destination. If the doubleword in the second source is\n"
49672"/// positive, copy the corresponding word from the first source to the\n"
49673"/// destination. If the doubleword in the second source is zero, clear the\n"
49674"/// corresponding word in the destination.\n"
49675"///\n"
49676"/// \\headerfile <x86intrin.h>\n"
49677"///\n"
49678"/// This intrinsic corresponds to the \\c VPSIGND instruction.\n"
49679"///\n"
49680"/// \\param __a\n"
49681"/// A 128-bit integer vector containing the values to be copied.\n"
49682"/// \\param __b\n"
49683"/// A 128-bit integer vector containing control doublewords corresponding to\n"
49684"/// positions in the destination.\n"
49685"/// \\returns A 128-bit integer vector containing the resultant values.\n"
49686"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
49687"_mm_sign_epi32(__m128i __a, __m128i __b)\n"
49688"{\n"
49689" return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);\n"
49690"}\n"
49691"\n"
49692"/// For each 8-bit integer in the first source operand, perform one of\n"
49693"/// the following actions as specified by the second source operand.\n"
49694"///\n"
49695"/// If the byte in the second source is negative, calculate the two's\n"
49696"/// complement of the corresponding byte in the first source, and write that\n"
49697"/// value to the destination. If the byte in the second source is positive,\n"
49698"/// copy the corresponding byte from the first source to the destination. If\n"
49699"/// the byte in the second source is zero, clear the corresponding byte in\n"
49700"/// the destination.\n"
49701"///\n"
49702"/// \\headerfile <x86intrin.h>\n"
49703"///\n"
49704"/// This intrinsic corresponds to the \\c PSIGNB instruction.\n"
49705"///\n"
49706"/// \\param __a\n"
49707"/// A 64-bit integer vector containing the values to be copied.\n"
49708"/// \\param __b\n"
49709"/// A 64-bit integer vector containing control bytes corresponding to\n"
49710"/// positions in the destination.\n"
49711"/// \\returns A 64-bit integer vector containing the resultant values.\n"
49712"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
49713"_mm_sign_pi8(__m64 __a, __m64 __b)\n"
49714"{\n"
49715" return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);\n"
49716"}\n"
49717"\n"
49718"/// For each 16-bit integer in the first source operand, perform one of\n"
49719"/// the following actions as specified by the second source operand.\n"
49720"///\n"
49721"/// If the word in the second source is negative, calculate the two's\n"
49722"/// complement of the corresponding word in the first source, and write that\n"
49723"/// value to the destination. If the word in the second source is positive,\n"
49724"/// copy the corresponding word from the first source to the destination. If\n"
49725"/// the word in the second source is zero, clear the corresponding word in\n"
49726"/// the destination.\n"
49727"///\n"
49728"/// \\headerfile <x86intrin.h>\n"
49729"///\n"
49730"/// This intrinsic corresponds to the \\c PSIGNW instruction.\n"
49731"///\n"
49732"/// \\param __a\n"
49733"/// A 64-bit integer vector containing the values to be copied.\n"
49734"/// \\param __b\n"
49735"/// A 64-bit integer vector containing control words corresponding to\n"
49736"/// positions in the destination.\n"
49737"/// \\returns A 64-bit integer vector containing the resultant values.\n"
49738"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
49739"_mm_sign_pi16(__m64 __a, __m64 __b)\n"
49740"{\n"
49741" return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);\n"
49742"}\n"
49743"\n"
49744"/// For each 32-bit integer in the first source operand, perform one of\n"
49745"/// the following actions as specified by the second source operand.\n"
49746"///\n"
49747"/// If the doubleword in the second source is negative, calculate the two's\n"
49748"/// complement of the corresponding doubleword in the first source, and\n"
49749"/// write that value to the destination. If the doubleword in the second\n"
49750"/// source is positive, copy the corresponding doubleword from the first\n"
49751"/// source to the destination. If the doubleword in the second source is\n"
49752"/// zero, clear the corresponding doubleword in the destination.\n"
49753"///\n"
49754"/// \\headerfile <x86intrin.h>\n"
49755"///\n"
49756"/// This intrinsic corresponds to the \\c PSIGND instruction.\n"
49757"///\n"
49758"/// \\param __a\n"
49759"/// A 64-bit integer vector containing the values to be copied.\n"
49760"/// \\param __b\n"
49761"/// A 64-bit integer vector containing two control doublewords corresponding\n"
49762"/// to positions in the destination.\n"
49763"/// \\returns A 64-bit integer vector containing the resultant values.\n"
49764"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
49765"_mm_sign_pi32(__m64 __a, __m64 __b)\n"
49766"{\n"
49767" return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);\n"
49768"}\n"
49769"\n"
49770"#undef __DEFAULT_FN_ATTRS\n"
49771"#undef __DEFAULT_FN_ATTRS_MMX\n"
49772"\n"
49773"#endif /* __TMMINTRIN_H */\n"
49774"" } ,
49775 { "/builtins/unwind.h" , "/*===---- unwind.h - Stack unwinding ----------------------------------------===\n"
49776" *\n"
49777" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
49778" * of this software and associated documentation files (the \"Software\"), to deal\n"
49779" * in the Software without restriction, including without limitation the rights\n"
49780" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
49781" * copies of the Software, and to permit persons to whom the Software is\n"
49782" * furnished to do so, subject to the following conditions:\n"
49783" *\n"
49784" * The above copyright notice and this permission notice shall be included in\n"
49785" * all copies or substantial portions of the Software.\n"
49786" *\n"
49787" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
49788" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
49789" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
49790" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
49791" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
49792" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
49793" * THE SOFTWARE.\n"
49794" *\n"
49795" *===-----------------------------------------------------------------------===\n"
49796" */\n"
49797"\n"
49798"/* See \"Data Definitions for libgcc_s\" in the Linux Standard Base.*/\n"
49799"\n"
49800"#if defined(__APPLE__) && __has_include_next(<unwind.h>)\n"
49801"/* Darwin (from 11.x on) provide an unwind.h. If that's available,\n"
49802" * use it. libunwind wraps some of its definitions in #ifdef _GNU_SOURCE,\n"
49803" * so define that around the include.*/\n"
49804"# ifndef _GNU_SOURCE\n"
49805"# define _SHOULD_UNDEFINE_GNU_SOURCE\n"
49806"# define _GNU_SOURCE\n"
49807"# endif\n"
49808"// libunwind's unwind.h reflects the current visibility. However, Mozilla\n"
49809"// builds with -fvisibility=hidden and relies on gcc's unwind.h to reset the\n"
49810"// visibility to default and export its contents. gcc also allows users to\n"
49811"// override its override by #defining HIDE_EXPORTS (but note, this only obeys\n"
49812"// the user's -fvisibility setting; it doesn't hide any exports on its own). We\n"
49813"// imitate gcc's header here:\n"
49814"# ifdef HIDE_EXPORTS\n"
49815"# include_next <unwind.h>\n"
49816"# else\n"
49817"# pragma GCC visibility push(default)\n"
49818"# include_next <unwind.h>\n"
49819"# pragma GCC visibility pop\n"
49820"# endif\n"
49821"# ifdef _SHOULD_UNDEFINE_GNU_SOURCE\n"
49822"# undef _GNU_SOURCE\n"
49823"# undef _SHOULD_UNDEFINE_GNU_SOURCE\n"
49824"# endif\n"
49825"#else\n"
49826"\n"
49827"#ifndef __CLANG_UNWIND_H\n"
49828"#define __CLANG_UNWIND_H\n"
49829"\n"
49830"#include <stdint.h>\n"
49831"\n"
49832"#ifdef __cplusplus\n"
49833"extern \"C\" {\n"
49834"#endif\n"
49835"\n"
49836"/* It is a bit strange for a header to play with the visibility of the\n"
49837" symbols it declares, but this matches gcc's behavior and some programs\n"
49838" depend on it */\n"
49839"#ifndef HIDE_EXPORTS\n"
49840"#pragma GCC visibility push(default)\n"
49841"#endif\n"
49842"\n"
49843"typedef uintptr_t _Unwind_Word;\n"
49844"typedef intptr_t _Unwind_Sword;\n"
49845"typedef uintptr_t _Unwind_Ptr;\n"
49846"typedef uintptr_t _Unwind_Internal_Ptr;\n"
49847"typedef uint64_t _Unwind_Exception_Class;\n"
49848"\n"
49849"typedef intptr_t _sleb128_t;\n"
49850"typedef uintptr_t _uleb128_t;\n"
49851"\n"
49852"struct _Unwind_Context;\n"
49853"#if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || defined(__ARM_DWARF_EH__))\n"
49854"struct _Unwind_Control_Block;\n"
49855"typedef struct _Unwind_Control_Block _Unwind_Exception; /* Alias */\n"
49856"#else\n"
49857"struct _Unwind_Exception;\n"
49858"typedef struct _Unwind_Exception _Unwind_Exception;\n"
49859"#endif\n"
49860"typedef enum {\n"
49861" _URC_NO_REASON = 0,\n"
49862"#if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \\\n"
49863" !defined(__ARM_DWARF_EH__)\n"
49864" _URC_OK = 0, /* used by ARM EHABI */\n"
49865"#endif\n"
49866" _URC_FOREIGN_EXCEPTION_CAUGHT = 1,\n"
49867"\n"
49868" _URC_FATAL_PHASE2_ERROR = 2,\n"
49869" _URC_FATAL_PHASE1_ERROR = 3,\n"
49870" _URC_NORMAL_STOP = 4,\n"
49871"\n"
49872" _URC_END_OF_STACK = 5,\n"
49873" _URC_HANDLER_FOUND = 6,\n"
49874" _URC_INSTALL_CONTEXT = 7,\n"
49875" _URC_CONTINUE_UNWIND = 8,\n"
49876"#if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \\\n"
49877" !defined(__ARM_DWARF_EH__)\n"
49878" _URC_FAILURE = 9 /* used by ARM EHABI */\n"
49879"#endif\n"
49880"} _Unwind_Reason_Code;\n"
49881"\n"
49882"typedef enum {\n"
49883" _UA_SEARCH_PHASE = 1,\n"
49884" _UA_CLEANUP_PHASE = 2,\n"
49885"\n"
49886" _UA_HANDLER_FRAME = 4,\n"
49887" _UA_FORCE_UNWIND = 8,\n"
49888" _UA_END_OF_STACK = 16 /* gcc extension to C++ ABI */\n"
49889"} _Unwind_Action;\n"
49890"\n"
49891"typedef void (*_Unwind_Exception_Cleanup_Fn)(_Unwind_Reason_Code,\n"
49892" _Unwind_Exception *);\n"
49893"\n"
49894"#if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || defined(__ARM_DWARF_EH__))\n"
49895"typedef struct _Unwind_Control_Block _Unwind_Control_Block;\n"
49896"typedef uint32_t _Unwind_EHT_Header;\n"
49897"\n"
49898"struct _Unwind_Control_Block {\n"
49899" uint64_t exception_class;\n"
49900" void (*exception_cleanup)(_Unwind_Reason_Code, _Unwind_Control_Block *);\n"
49901" /* unwinder cache (private fields for the unwinder's use) */\n"
49902" struct {\n"
49903" uint32_t reserved1; /* forced unwind stop function, 0 if not forced */\n"
49904" uint32_t reserved2; /* personality routine */\n"
49905" uint32_t reserved3; /* callsite */\n"
49906" uint32_t reserved4; /* forced unwind stop argument */\n"
49907" uint32_t reserved5;\n"
49908" } unwinder_cache;\n"
49909" /* propagation barrier cache (valid after phase 1) */\n"
49910" struct {\n"
49911" uint32_t sp;\n"
49912" uint32_t bitpattern[5];\n"
49913" } barrier_cache;\n"
49914" /* cleanup cache (preserved over cleanup) */\n"
49915" struct {\n"
49916" uint32_t bitpattern[4];\n"
49917" } cleanup_cache;\n"
49918" /* personality cache (for personality's benefit) */\n"
49919" struct {\n"
49920" uint32_t fnstart; /* function start address */\n"
49921" _Unwind_EHT_Header *ehtp; /* pointer to EHT entry header word */\n"
49922" uint32_t additional; /* additional data */\n"
49923" uint32_t reserved1;\n"
49924" } pr_cache;\n"
49925" long long int : 0; /* force alignment of next item to 8-byte boundary */\n"
49926"} __attribute__((__aligned__(8)));\n"
49927"#else\n"
49928"struct _Unwind_Exception {\n"
49929" _Unwind_Exception_Class exception_class;\n"
49930" _Unwind_Exception_Cleanup_Fn exception_cleanup;\n"
49931"#if !defined (__USING_SJLJ_EXCEPTIONS__) && defined (__SEH__)\n"
49932" _Unwind_Word private_[6];\n"
49933"#else\n"
49934" _Unwind_Word private_1;\n"
49935" _Unwind_Word private_2;\n"
49936"#endif\n"
49937" /* The Itanium ABI requires that _Unwind_Exception objects are \"double-word\n"
49938" * aligned\". GCC has interpreted this to mean \"use the maximum useful\n"
49939" * alignment for the target\"; so do we. */\n"
49940"} __attribute__((__aligned__));\n"
49941"#endif\n"
49942"\n"
49943"typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn)(int, _Unwind_Action,\n"
49944" _Unwind_Exception_Class,\n"
49945" _Unwind_Exception *,\n"
49946" struct _Unwind_Context *,\n"
49947" void *);\n"
49948"\n"
49949"typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)(int, _Unwind_Action,\n"
49950" _Unwind_Exception_Class,\n"
49951" _Unwind_Exception *,\n"
49952" struct _Unwind_Context *);\n"
49953"typedef _Unwind_Personality_Fn __personality_routine;\n"
49954"\n"
49955"typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)(struct _Unwind_Context *,\n"
49956" void *);\n"
49957"\n"
49958"#if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || defined(__ARM_DWARF_EH__))\n"
49959"typedef enum {\n"
49960" _UVRSC_CORE = 0, /* integer register */\n"
49961" _UVRSC_VFP = 1, /* vfp */\n"
49962" _UVRSC_WMMXD = 3, /* Intel WMMX data register */\n"
49963" _UVRSC_WMMXC = 4 /* Intel WMMX control register */\n"
49964"} _Unwind_VRS_RegClass;\n"
49965"\n"
49966"typedef enum {\n"
49967" _UVRSD_UINT32 = 0,\n"
49968" _UVRSD_VFPX = 1,\n"
49969" _UVRSD_UINT64 = 3,\n"
49970" _UVRSD_FLOAT = 4,\n"
49971" _UVRSD_DOUBLE = 5\n"
49972"} _Unwind_VRS_DataRepresentation;\n"
49973"\n"
49974"typedef enum {\n"
49975" _UVRSR_OK = 0,\n"
49976" _UVRSR_NOT_IMPLEMENTED = 1,\n"
49977" _UVRSR_FAILED = 2\n"
49978"} _Unwind_VRS_Result;\n"
49979"\n"
49980"typedef uint32_t _Unwind_State;\n"
49981"#define _US_VIRTUAL_UNWIND_FRAME ((_Unwind_State)0)\n"
49982"#define _US_UNWIND_FRAME_STARTING ((_Unwind_State)1)\n"
49983"#define _US_UNWIND_FRAME_RESUME ((_Unwind_State)2)\n"
49984"#define _US_ACTION_MASK ((_Unwind_State)3)\n"
49985"#define _US_FORCE_UNWIND ((_Unwind_State)8)\n"
49986"\n"
49987"_Unwind_VRS_Result _Unwind_VRS_Get(struct _Unwind_Context *__context,\n"
49988" _Unwind_VRS_RegClass __regclass,\n"
49989" uint32_t __regno,\n"
49990" _Unwind_VRS_DataRepresentation __representation,\n"
49991" void *__valuep);\n"
49992"\n"
49993"_Unwind_VRS_Result _Unwind_VRS_Set(struct _Unwind_Context *__context,\n"
49994" _Unwind_VRS_RegClass __regclass,\n"
49995" uint32_t __regno,\n"
49996" _Unwind_VRS_DataRepresentation __representation,\n"
49997" void *__valuep);\n"
49998"\n"
49999"static __inline__\n"
50000"_Unwind_Word _Unwind_GetGR(struct _Unwind_Context *__context, int __index) {\n"
50001" _Unwind_Word __value;\n"
50002" _Unwind_VRS_Get(__context, _UVRSC_CORE, __index, _UVRSD_UINT32, &__value);\n"
50003" return __value;\n"
50004"}\n"
50005"\n"
50006"static __inline__\n"
50007"void _Unwind_SetGR(struct _Unwind_Context *__context, int __index,\n"
50008" _Unwind_Word __value) {\n"
50009" _Unwind_VRS_Set(__context, _UVRSC_CORE, __index, _UVRSD_UINT32, &__value);\n"
50010"}\n"
50011"\n"
50012"static __inline__\n"
50013"_Unwind_Word _Unwind_GetIP(struct _Unwind_Context *__context) {\n"
50014" _Unwind_Word __ip = _Unwind_GetGR(__context, 15);\n"
50015" return __ip & ~(_Unwind_Word)(0x1); /* Remove thumb mode bit. */\n"
50016"}\n"
50017"\n"
50018"static __inline__\n"
50019"void _Unwind_SetIP(struct _Unwind_Context *__context, _Unwind_Word __value) {\n"
50020" _Unwind_Word __thumb_mode_bit = _Unwind_GetGR(__context, 15) & 0x1;\n"
50021" _Unwind_SetGR(__context, 15, __value | __thumb_mode_bit);\n"
50022"}\n"
50023"#else\n"
50024"_Unwind_Word _Unwind_GetGR(struct _Unwind_Context *, int);\n"
50025"void _Unwind_SetGR(struct _Unwind_Context *, int, _Unwind_Word);\n"
50026"\n"
50027"_Unwind_Word _Unwind_GetIP(struct _Unwind_Context *);\n"
50028"void _Unwind_SetIP(struct _Unwind_Context *, _Unwind_Word);\n"
50029"#endif\n"
50030"\n"
50031"\n"
50032"_Unwind_Word _Unwind_GetIPInfo(struct _Unwind_Context *, int *);\n"
50033"\n"
50034"_Unwind_Word _Unwind_GetCFA(struct _Unwind_Context *);\n"
50035"\n"
50036"_Unwind_Word _Unwind_GetBSP(struct _Unwind_Context *);\n"
50037"\n"
50038"void *_Unwind_GetLanguageSpecificData(struct _Unwind_Context *);\n"
50039"\n"
50040"_Unwind_Ptr _Unwind_GetRegionStart(struct _Unwind_Context *);\n"
50041"\n"
50042"/* DWARF EH functions; currently not available on Darwin/ARM */\n"
50043"#if !defined(__APPLE__) || !defined(__arm__)\n"
50044"_Unwind_Reason_Code _Unwind_RaiseException(_Unwind_Exception *);\n"
50045"_Unwind_Reason_Code _Unwind_ForcedUnwind(_Unwind_Exception *, _Unwind_Stop_Fn,\n"
50046" void *);\n"
50047"void _Unwind_DeleteException(_Unwind_Exception *);\n"
50048"void _Unwind_Resume(_Unwind_Exception *);\n"
50049"_Unwind_Reason_Code _Unwind_Resume_or_Rethrow(_Unwind_Exception *);\n"
50050"\n"
50051"#endif\n"
50052"\n"
50053"_Unwind_Reason_Code _Unwind_Backtrace(_Unwind_Trace_Fn, void *);\n"
50054"\n"
50055"/* setjmp(3)/longjmp(3) stuff */\n"
50056"typedef struct SjLj_Function_Context *_Unwind_FunctionContext_t;\n"
50057"\n"
50058"void _Unwind_SjLj_Register(_Unwind_FunctionContext_t);\n"
50059"void _Unwind_SjLj_Unregister(_Unwind_FunctionContext_t);\n"
50060"_Unwind_Reason_Code _Unwind_SjLj_RaiseException(_Unwind_Exception *);\n"
50061"_Unwind_Reason_Code _Unwind_SjLj_ForcedUnwind(_Unwind_Exception *,\n"
50062" _Unwind_Stop_Fn, void *);\n"
50063"void _Unwind_SjLj_Resume(_Unwind_Exception *);\n"
50064"_Unwind_Reason_Code _Unwind_SjLj_Resume_or_Rethrow(_Unwind_Exception *);\n"
50065"\n"
50066"void *_Unwind_FindEnclosingFunction(void *);\n"
50067"\n"
50068"#ifdef __APPLE__\n"
50069"\n"
50070"_Unwind_Ptr _Unwind_GetDataRelBase(struct _Unwind_Context *)\n"
50071" __attribute__((__unavailable__));\n"
50072"_Unwind_Ptr _Unwind_GetTextRelBase(struct _Unwind_Context *)\n"
50073" __attribute__((__unavailable__));\n"
50074"\n"
50075"/* Darwin-specific functions */\n"
50076"void __register_frame(const void *);\n"
50077"void __deregister_frame(const void *);\n"
50078"\n"
50079"struct dwarf_eh_bases {\n"
50080" uintptr_t tbase;\n"
50081" uintptr_t dbase;\n"
50082" uintptr_t func;\n"
50083"};\n"
50084"void *_Unwind_Find_FDE(const void *, struct dwarf_eh_bases *);\n"
50085"\n"
50086"void __register_frame_info_bases(const void *, void *, void *, void *)\n"
50087" __attribute__((__unavailable__));\n"
50088"void __register_frame_info(const void *, void *) __attribute__((__unavailable__));\n"
50089"void __register_frame_info_table_bases(const void *, void*, void *, void *)\n"
50090" __attribute__((__unavailable__));\n"
50091"void __register_frame_info_table(const void *, void *)\n"
50092" __attribute__((__unavailable__));\n"
50093"void __register_frame_table(const void *) __attribute__((__unavailable__));\n"
50094"void __deregister_frame_info(const void *) __attribute__((__unavailable__));\n"
50095"void __deregister_frame_info_bases(const void *)__attribute__((__unavailable__));\n"
50096"\n"
50097"#else\n"
50098"\n"
50099"_Unwind_Ptr _Unwind_GetDataRelBase(struct _Unwind_Context *);\n"
50100"_Unwind_Ptr _Unwind_GetTextRelBase(struct _Unwind_Context *);\n"
50101"\n"
50102"#endif\n"
50103"\n"
50104"\n"
50105"#ifndef HIDE_EXPORTS\n"
50106"#pragma GCC visibility pop\n"
50107"#endif\n"
50108"\n"
50109"#ifdef __cplusplus\n"
50110"}\n"
50111"#endif\n"
50112"\n"
50113"#endif /* __CLANG_UNWIND_H */\n"
50114"\n"
50115"#endif\n"
50116"\n"
50117"" } ,
50118 { "/builtins/vadefs.h" , "/* ===-------- vadefs.h ---------------------------------------------------===\n"
50119" *\n"
50120" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
50121" * of this software and associated documentation files (the \"Software\"), to deal\n"
50122" * in the Software without restriction, including without limitation the rights\n"
50123" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
50124" * copies of the Software, and to permit persons to whom the Software is\n"
50125" * furnished to do so, subject to the following conditions:\n"
50126" *\n"
50127" * The above copyright notice and this permission notice shall be included in\n"
50128" * all copies or substantial portions of the Software.\n"
50129" *\n"
50130" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
50131" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
50132" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
50133" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
50134" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
50135" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
50136" * THE SOFTWARE.\n"
50137" *\n"
50138" *===-----------------------------------------------------------------------===\n"
50139" */\n"
50140"\n"
50141"/* Only include this if we are aiming for MSVC compatibility. */\n"
50142"#ifndef _MSC_VER\n"
50143"#include_next <vadefs.h>\n"
50144"#else\n"
50145"\n"
50146"#ifndef __clang_vadefs_h\n"
50147"#define __clang_vadefs_h\n"
50148"\n"
50149"#include_next <vadefs.h>\n"
50150"\n"
50151"/* Override macros from vadefs.h with definitions that work with Clang. */\n"
50152"#ifdef _crt_va_start\n"
50153"#undef _crt_va_start\n"
50154"#define _crt_va_start(ap, param) __builtin_va_start(ap, param)\n"
50155"#endif\n"
50156"#ifdef _crt_va_end\n"
50157"#undef _crt_va_end\n"
50158"#define _crt_va_end(ap) __builtin_va_end(ap)\n"
50159"#endif\n"
50160"#ifdef _crt_va_arg\n"
50161"#undef _crt_va_arg\n"
50162"#define _crt_va_arg(ap, type) __builtin_va_arg(ap, type)\n"
50163"#endif\n"
50164"\n"
50165"/* VS 2015 switched to double underscore names, which is an improvement, but now\n"
50166" * we have to intercept those names too.\n"
50167" */\n"
50168"#ifdef __crt_va_start\n"
50169"#undef __crt_va_start\n"
50170"#define __crt_va_start(ap, param) __builtin_va_start(ap, param)\n"
50171"#endif\n"
50172"#ifdef __crt_va_end\n"
50173"#undef __crt_va_end\n"
50174"#define __crt_va_end(ap) __builtin_va_end(ap)\n"
50175"#endif\n"
50176"#ifdef __crt_va_arg\n"
50177"#undef __crt_va_arg\n"
50178"#define __crt_va_arg(ap, type) __builtin_va_arg(ap, type)\n"
50179"#endif\n"
50180"\n"
50181"#endif\n"
50182"#endif\n"
50183"" } ,
50184 { "/builtins/vaesintrin.h" , "/*===------------------ vaesintrin.h - VAES intrinsics ---------------------===\n"
50185" *\n"
50186" *\n"
50187" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
50188" * of this software and associated documentation files (the \"Software\"), to deal\n"
50189" * in the Software without restriction, including without limitation the rights\n"
50190" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
50191" * copies of the Software, and to permit persons to whom the Software is\n"
50192" * furnished to do so, subject to the following conditions:\n"
50193" *\n"
50194" * The above copyright notice and this permission notice shall be included in\n"
50195" * all copies or substantial portions of the Software.\n"
50196" *\n"
50197" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
50198" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
50199" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
50200" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
50201" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
50202" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
50203" * THE SOFTWARE.\n"
50204" *\n"
50205" *===-----------------------------------------------------------------------===\n"
50206" */\n"
50207"#ifndef __IMMINTRIN_H\n"
50208"#error \"Never use <vaesintrin.h> directly; include <immintrin.h> instead.\"\n"
50209"#endif\n"
50210"\n"
50211"#ifndef __VAESINTRIN_H\n"
50212"#define __VAESINTRIN_H\n"
50213"\n"
50214"/* Default attributes for YMM forms. */\n"
50215"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"vaes\"), __min_vector_width__(256)))\n"
50216"\n"
50217"/* Default attributes for ZMM forms. */\n"
50218"#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__(\"avx512f,vaes\"), __min_vector_width__(512)))\n"
50219"\n"
50220"\n"
50221"static __inline__ __m256i __DEFAULT_FN_ATTRS\n"
50222" _mm256_aesenc_epi128(__m256i __A, __m256i __B)\n"
50223"{\n"
50224" return (__m256i) __builtin_ia32_aesenc256((__v4di) __A,\n"
50225" (__v4di) __B);\n"
50226"}\n"
50227"\n"
50228"static __inline__ __m512i __DEFAULT_FN_ATTRS_F\n"
50229" _mm512_aesenc_epi128(__m512i __A, __m512i __B)\n"
50230"{\n"
50231" return (__m512i) __builtin_ia32_aesenc512((__v8di) __A,\n"
50232" (__v8di) __B);\n"
50233"}\n"
50234"\n"
50235"static __inline__ __m256i __DEFAULT_FN_ATTRS\n"
50236" _mm256_aesdec_epi128(__m256i __A, __m256i __B)\n"
50237"{\n"
50238" return (__m256i) __builtin_ia32_aesdec256((__v4di) __A,\n"
50239" (__v4di) __B);\n"
50240"}\n"
50241"\n"
50242"static __inline__ __m512i __DEFAULT_FN_ATTRS_F\n"
50243" _mm512_aesdec_epi128(__m512i __A, __m512i __B)\n"
50244"{\n"
50245" return (__m512i) __builtin_ia32_aesdec512((__v8di) __A,\n"
50246" (__v8di) __B);\n"
50247"}\n"
50248"\n"
50249"static __inline__ __m256i __DEFAULT_FN_ATTRS\n"
50250" _mm256_aesenclast_epi128(__m256i __A, __m256i __B)\n"
50251"{\n"
50252" return (__m256i) __builtin_ia32_aesenclast256((__v4di) __A,\n"
50253" (__v4di) __B);\n"
50254"}\n"
50255"\n"
50256"static __inline__ __m512i __DEFAULT_FN_ATTRS_F\n"
50257" _mm512_aesenclast_epi128(__m512i __A, __m512i __B)\n"
50258"{\n"
50259" return (__m512i) __builtin_ia32_aesenclast512((__v8di) __A,\n"
50260" (__v8di) __B);\n"
50261"}\n"
50262"\n"
50263"static __inline__ __m256i __DEFAULT_FN_ATTRS\n"
50264" _mm256_aesdeclast_epi128(__m256i __A, __m256i __B)\n"
50265"{\n"
50266" return (__m256i) __builtin_ia32_aesdeclast256((__v4di) __A,\n"
50267" (__v4di) __B);\n"
50268"}\n"
50269"\n"
50270"static __inline__ __m512i __DEFAULT_FN_ATTRS_F\n"
50271" _mm512_aesdeclast_epi128(__m512i __A, __m512i __B)\n"
50272"{\n"
50273" return (__m512i) __builtin_ia32_aesdeclast512((__v8di) __A,\n"
50274" (__v8di) __B);\n"
50275"}\n"
50276"\n"
50277"\n"
50278"#undef __DEFAULT_FN_ATTRS\n"
50279"#undef __DEFAULT_FN_ATTRS_F\n"
50280"\n"
50281"#endif\n"
50282"" } ,
50283 { "/builtins/varargs.h" , "/*===---- varargs.h - Variable argument handling -------------------------------------===\n"
50284"*\n"
50285"* Permission is hereby granted, free of charge, to any person obtaining a copy\n"
50286"* of this software and associated documentation files (the \"Software\"), to deal\n"
50287"* in the Software without restriction, including without limitation the rights\n"
50288"* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
50289"* copies of the Software, and to permit persons to whom the Software is\n"
50290"* furnished to do so, subject to the following conditions:\n"
50291"*\n"
50292"* The above copyright notice and this permission notice shall be included in\n"
50293"* all copies or substantial portions of the Software.\n"
50294"*\n"
50295"* THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
50296"* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
50297"* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
50298"* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
50299"* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
50300"* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
50301"* THE SOFTWARE.\n"
50302"*\n"
50303"*===-----------------------------------------------------------------------===\n"
50304"*/\n"
50305"#ifndef __VARARGS_H\n"
50306"#define __VARARGS_H\n"
50307" #error \"Please use <stdarg.h> instead of <varargs.h>\"\n"
50308"#endif\n"
50309"" } ,
50310 { "/builtins/vpclmulqdqintrin.h" , "/*===------------ vpclmulqdqintrin.h - VPCLMULQDQ intrinsics ---------------===\n"
50311" *\n"
50312" *\n"
50313" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
50314" * of this software and associated documentation files (the \"Software\"), to deal\n"
50315" * in the Software without restriction, including without limitation the rights\n"
50316" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
50317" * copies of the Software, and to permit persons to whom the Software is\n"
50318" * furnished to do so, subject to the following conditions:\n"
50319" *\n"
50320" * The above copyright notice and this permission notice shall be included in\n"
50321" * all copies or substantial portions of the Software.\n"
50322" *\n"
50323" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
50324" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
50325" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
50326" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
50327" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
50328" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
50329" * THE SOFTWARE.\n"
50330" *\n"
50331" *===-----------------------------------------------------------------------===\n"
50332" */\n"
50333"#ifndef __IMMINTRIN_H\n"
50334"#error \"Never use <vpclmulqdqintrin.h> directly; include <immintrin.h> instead.\"\n"
50335"#endif\n"
50336"\n"
50337"#ifndef __VPCLMULQDQINTRIN_H\n"
50338"#define __VPCLMULQDQINTRIN_H\n"
50339"\n"
50340"#define _mm256_clmulepi64_epi128(A, B, I) \\\n"
50341" (__m256i)__builtin_ia32_pclmulqdq256((__v4di)(__m256i)(A), \\\n"
50342" (__v4di)(__m256i)(B), \\\n"
50343" (char)(I))\n"
50344"\n"
50345"#define _mm512_clmulepi64_epi128(A, B, I) \\\n"
50346" (__m512i)__builtin_ia32_pclmulqdq512((__v8di)(__m512i)(A), \\\n"
50347" (__v8di)(__m512i)(B), \\\n"
50348" (char)(I))\n"
50349"\n"
50350"#endif /* __VPCLMULQDQINTRIN_H */\n"
50351"\n"
50352"" } ,
50353 { "/builtins/waitpkgintrin.h" , "/*===----------------------- waitpkgintrin.h - WAITPKG --------------------===\n"
50354" *\n"
50355" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
50356" * of this software and associated documentation files (the \"Software\"), to deal\n"
50357" * in the Software without restriction, including without limitation the rights\n"
50358" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
50359" * copies of the Software, and to permit persons to whom the Software is\n"
50360" * furnished to do so, subject to the following conditions:\n"
50361" *\n"
50362" * The above copyright notice and this permission notice shall be included in\n"
50363" * all copies or substantial portions of the Software.\n"
50364" *\n"
50365" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
50366" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
50367" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
50368" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
50369" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
50370" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
50371" * THE SOFTWARE.\n"
50372" *\n"
50373" *===-----------------------------------------------------------------------===\n"
50374" */\n"
50375"#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n"
50376"#error \"Never use <waitpkgintrin.h> directly; include <x86intrin.h> instead.\"\n"
50377"#endif\n"
50378"\n"
50379"#ifndef __WAITPKGINTRIN_H\n"
50380"#define __WAITPKGINTRIN_H\n"
50381"\n"
50382"/* Define the default attributes for the functions in this file. */\n"
50383"#define __DEFAULT_FN_ATTRS \\\n"
50384" __attribute__((__always_inline__, __nodebug__, __target__(\"waitpkg\")))\n"
50385"\n"
50386"static __inline__ void __DEFAULT_FN_ATTRS\n"
50387"_umonitor (void * __address)\n"
50388"{\n"
50389" __builtin_ia32_umonitor (__address);\n"
50390"}\n"
50391"\n"
50392"static __inline__ unsigned char __DEFAULT_FN_ATTRS\n"
50393"_umwait (unsigned int __control, unsigned long long __counter)\n"
50394"{\n"
50395" return __builtin_ia32_umwait (__control,\n"
50396" (unsigned int)(__counter >> 32), (unsigned int)__counter);\n"
50397"}\n"
50398"\n"
50399"static __inline__ unsigned char __DEFAULT_FN_ATTRS\n"
50400"_tpause (unsigned int __control, unsigned long long __counter)\n"
50401"{\n"
50402" return __builtin_ia32_tpause (__control,\n"
50403" (unsigned int)(__counter >> 32), (unsigned int)__counter);\n"
50404"}\n"
50405"\n"
50406"#undef __DEFAULT_FN_ATTRS\n"
50407"\n"
50408"#endif /* __WAITPKGINTRIN_H */\n"
50409"" } ,
50410 { "/builtins/wbnoinvdintrin.h" , "/*===-------------- wbnoinvdintrin.h - wbnoinvd intrinsic-------------------===\n"
50411" *\n"
50412" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
50413" * of this software and associated documentation files (the \"Software\"), to deal\n"
50414" * in the Software without restriction, including without limitation the rights\n"
50415" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
50416" * copies of the Software, and to permit persons to whom the Software is\n"
50417" * furnished to do so, subject to the following conditions:\n"
50418" *\n"
50419" * The above copyright notice and this permission notice shall be included in\n"
50420" * all copies or substantial portions of the Software.\n"
50421" *\n"
50422" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
50423" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
50424" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
50425" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
50426" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
50427" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
50428" * THE SOFTWARE.\n"
50429" *\n"
50430" *===-----------------------------------------------------------------------===\n"
50431" */\n"
50432"\n"
50433"#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n"
50434"#error \"Never use <wbnoinvdintrin.h> directly; include <x86intrin.h> instead.\"\n"
50435"#endif\n"
50436"\n"
50437"#ifndef __WBNOINVDINTRIN_H\n"
50438"#define __WBNOINVDINTRIN_H\n"
50439"\n"
50440"static __inline__ void\n"
50441" __attribute__((__always_inline__, __nodebug__, __target__(\"wbnoinvd\")))\n"
50442"_wbnoinvd (void)\n"
50443"{\n"
50444" __builtin_ia32_wbnoinvd ();\n"
50445"}\n"
50446"\n"
50447"#endif /* __WBNOINVDINTRIN_H */\n"
50448"" } ,
50449 { "/builtins/wmmintrin.h" , "/*===---- wmmintrin.h - AES intrinsics ------------------------------------===\n"
50450" *\n"
50451" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
50452" * of this software and associated documentation files (the \"Software\"), to deal\n"
50453" * in the Software without restriction, including without limitation the rights\n"
50454" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
50455" * copies of the Software, and to permit persons to whom the Software is\n"
50456" * furnished to do so, subject to the following conditions:\n"
50457" *\n"
50458" * The above copyright notice and this permission notice shall be included in\n"
50459" * all copies or substantial portions of the Software.\n"
50460" *\n"
50461" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
50462" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
50463" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
50464" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
50465" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
50466" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
50467" * THE SOFTWARE.\n"
50468" *\n"
50469" *===-----------------------------------------------------------------------===\n"
50470" */\n"
50471"\n"
50472"#ifndef __WMMINTRIN_H\n"
50473"#define __WMMINTRIN_H\n"
50474"\n"
50475"#include <emmintrin.h>\n"
50476"\n"
50477"#include <__wmmintrin_aes.h>\n"
50478"\n"
50479"#include <__wmmintrin_pclmul.h>\n"
50480"\n"
50481"#endif /* __WMMINTRIN_H */\n"
50482"" } ,
50483 { "/builtins/x86intrin.h" , "/*===---- x86intrin.h - X86 intrinsics -------------------------------------===\n"
50484" *\n"
50485" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
50486" * of this software and associated documentation files (the \"Software\"), to deal\n"
50487" * in the Software without restriction, including without limitation the rights\n"
50488" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
50489" * copies of the Software, and to permit persons to whom the Software is\n"
50490" * furnished to do so, subject to the following conditions:\n"
50491" *\n"
50492" * The above copyright notice and this permission notice shall be included in\n"
50493" * all copies or substantial portions of the Software.\n"
50494" *\n"
50495" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
50496" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
50497" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
50498" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
50499" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
50500" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
50501" * THE SOFTWARE.\n"
50502" *\n"
50503" *===-----------------------------------------------------------------------===\n"
50504" */\n"
50505"\n"
50506"#ifndef __X86INTRIN_H\n"
50507"#define __X86INTRIN_H\n"
50508"\n"
50509"#include <ia32intrin.h>\n"
50510"\n"
50511"#include <immintrin.h>\n"
50512"\n"
50513"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__3dNOW__)\n"
50514"#include <mm3dnow.h>\n"
50515"#endif\n"
50516"\n"
50517"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PRFCHW__)\n"
50518"#include <prfchwintrin.h>\n"
50519"#endif\n"
50520"\n"
50521"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE4A__)\n"
50522"#include <ammintrin.h>\n"
50523"#endif\n"
50524"\n"
50525"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA4__)\n"
50526"#include <fma4intrin.h>\n"
50527"#endif\n"
50528"\n"
50529"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XOP__)\n"
50530"#include <xopintrin.h>\n"
50531"#endif\n"
50532"\n"
50533"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__TBM__)\n"
50534"#include <tbmintrin.h>\n"
50535"#endif\n"
50536"\n"
50537"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__LWP__)\n"
50538"#include <lwpintrin.h>\n"
50539"#endif\n"
50540"\n"
50541"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MWAITX__)\n"
50542"#include <mwaitxintrin.h>\n"
50543"#endif\n"
50544"\n"
50545"#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLZERO__)\n"
50546"#include <clzerointrin.h>\n"
50547"#endif\n"
50548"\n"
50549"\n"
50550"#endif /* __X86INTRIN_H */\n"
50551"" } ,
50552 { "/builtins/xmmintrin.h" , "/*===---- xmmintrin.h - SSE intrinsics -------------------------------------===\n"
50553" *\n"
50554" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
50555" * of this software and associated documentation files (the \"Software\"), to deal\n"
50556" * in the Software without restriction, including without limitation the rights\n"
50557" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
50558" * copies of the Software, and to permit persons to whom the Software is\n"
50559" * furnished to do so, subject to the following conditions:\n"
50560" *\n"
50561" * The above copyright notice and this permission notice shall be included in\n"
50562" * all copies or substantial portions of the Software.\n"
50563" *\n"
50564" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
50565" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
50566" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
50567" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
50568" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
50569" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
50570" * THE SOFTWARE.\n"
50571" *\n"
50572" *===-----------------------------------------------------------------------===\n"
50573" */\n"
50574"\n"
50575"#ifndef __XMMINTRIN_H\n"
50576"#define __XMMINTRIN_H\n"
50577"\n"
50578"#include <mmintrin.h>\n"
50579"\n"
50580"typedef int __v4si __attribute__((__vector_size__(16)));\n"
50581"typedef float __v4sf __attribute__((__vector_size__(16)));\n"
50582"typedef float __m128 __attribute__((__vector_size__(16)));\n"
50583"\n"
50584"/* Unsigned types */\n"
50585"typedef unsigned int __v4su __attribute__((__vector_size__(16)));\n"
50586"\n"
50587"/* This header should only be included in a hosted environment as it depends on\n"
50588" * a standard library to provide allocation routines. */\n"
50589"#if __STDC_HOSTED__\n"
50590"#include <mm_malloc.h>\n"
50591"#endif\n"
50592"\n"
50593"/* Define the default attributes for the functions in this file. */\n"
50594"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"sse\"), __min_vector_width__(128)))\n"
50595"#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__(\"mmx,sse\"), __min_vector_width__(64)))\n"
50596"\n"
50597"/// Adds the 32-bit float values in the low-order bits of the operands.\n"
50598"///\n"
50599"/// \\headerfile <x86intrin.h>\n"
50600"///\n"
50601"/// This intrinsic corresponds to the <c> VADDSS / ADDSS </c> instructions.\n"
50602"///\n"
50603"/// \\param __a\n"
50604"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
50605"/// The lower 32 bits of this operand are used in the calculation.\n"
50606"/// \\param __b\n"
50607"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
50608"/// The lower 32 bits of this operand are used in the calculation.\n"
50609"/// \\returns A 128-bit vector of [4 x float] whose lower 32 bits contain the sum\n"
50610"/// of the lower 32 bits of both operands. The upper 96 bits are copied from\n"
50611"/// the upper 96 bits of the first source operand.\n"
50612"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
50613"_mm_add_ss(__m128 __a, __m128 __b)\n"
50614"{\n"
50615" __a[0] += __b[0];\n"
50616" return __a;\n"
50617"}\n"
50618"\n"
50619"/// Adds two 128-bit vectors of [4 x float], and returns the results of\n"
50620"/// the addition.\n"
50621"///\n"
50622"/// \\headerfile <x86intrin.h>\n"
50623"///\n"
50624"/// This intrinsic corresponds to the <c> VADDPS / ADDPS </c> instructions.\n"
50625"///\n"
50626"/// \\param __a\n"
50627"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
50628"/// \\param __b\n"
50629"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
50630"/// \\returns A 128-bit vector of [4 x float] containing the sums of both\n"
50631"/// operands.\n"
50632"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
50633"_mm_add_ps(__m128 __a, __m128 __b)\n"
50634"{\n"
50635" return (__m128)((__v4sf)__a + (__v4sf)__b);\n"
50636"}\n"
50637"\n"
50638"/// Subtracts the 32-bit float value in the low-order bits of the second\n"
50639"/// operand from the corresponding value in the first operand.\n"
50640"///\n"
50641"/// \\headerfile <x86intrin.h>\n"
50642"///\n"
50643"/// This intrinsic corresponds to the <c> VSUBSS / SUBSS </c> instructions.\n"
50644"///\n"
50645"/// \\param __a\n"
50646"/// A 128-bit vector of [4 x float] containing the minuend. The lower 32 bits\n"
50647"/// of this operand are used in the calculation.\n"
50648"/// \\param __b\n"
50649"/// A 128-bit vector of [4 x float] containing the subtrahend. The lower 32\n"
50650"/// bits of this operand are used in the calculation.\n"
50651"/// \\returns A 128-bit vector of [4 x float] whose lower 32 bits contain the\n"
50652"/// difference of the lower 32 bits of both operands. The upper 96 bits are\n"
50653"/// copied from the upper 96 bits of the first source operand.\n"
50654"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
50655"_mm_sub_ss(__m128 __a, __m128 __b)\n"
50656"{\n"
50657" __a[0] -= __b[0];\n"
50658" return __a;\n"
50659"}\n"
50660"\n"
50661"/// Subtracts each of the values of the second operand from the first\n"
50662"/// operand, both of which are 128-bit vectors of [4 x float] and returns\n"
50663"/// the results of the subtraction.\n"
50664"///\n"
50665"/// \\headerfile <x86intrin.h>\n"
50666"///\n"
50667"/// This intrinsic corresponds to the <c> VSUBPS / SUBPS </c> instructions.\n"
50668"///\n"
50669"/// \\param __a\n"
50670"/// A 128-bit vector of [4 x float] containing the minuend.\n"
50671"/// \\param __b\n"
50672"/// A 128-bit vector of [4 x float] containing the subtrahend.\n"
50673"/// \\returns A 128-bit vector of [4 x float] containing the differences between\n"
50674"/// both operands.\n"
50675"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
50676"_mm_sub_ps(__m128 __a, __m128 __b)\n"
50677"{\n"
50678" return (__m128)((__v4sf)__a - (__v4sf)__b);\n"
50679"}\n"
50680"\n"
50681"/// Multiplies two 32-bit float values in the low-order bits of the\n"
50682"/// operands.\n"
50683"///\n"
50684"/// \\headerfile <x86intrin.h>\n"
50685"///\n"
50686"/// This intrinsic corresponds to the <c> VMULSS / MULSS </c> instructions.\n"
50687"///\n"
50688"/// \\param __a\n"
50689"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
50690"/// The lower 32 bits of this operand are used in the calculation.\n"
50691"/// \\param __b\n"
50692"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
50693"/// The lower 32 bits of this operand are used in the calculation.\n"
50694"/// \\returns A 128-bit vector of [4 x float] containing the product of the lower\n"
50695"/// 32 bits of both operands. The upper 96 bits are copied from the upper 96\n"
50696"/// bits of the first source operand.\n"
50697"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
50698"_mm_mul_ss(__m128 __a, __m128 __b)\n"
50699"{\n"
50700" __a[0] *= __b[0];\n"
50701" return __a;\n"
50702"}\n"
50703"\n"
50704"/// Multiplies two 128-bit vectors of [4 x float] and returns the\n"
50705"/// results of the multiplication.\n"
50706"///\n"
50707"/// \\headerfile <x86intrin.h>\n"
50708"///\n"
50709"/// This intrinsic corresponds to the <c> VMULPS / MULPS </c> instructions.\n"
50710"///\n"
50711"/// \\param __a\n"
50712"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
50713"/// \\param __b\n"
50714"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
50715"/// \\returns A 128-bit vector of [4 x float] containing the products of both\n"
50716"/// operands.\n"
50717"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
50718"_mm_mul_ps(__m128 __a, __m128 __b)\n"
50719"{\n"
50720" return (__m128)((__v4sf)__a * (__v4sf)__b);\n"
50721"}\n"
50722"\n"
50723"/// Divides the value in the low-order 32 bits of the first operand by\n"
50724"/// the corresponding value in the second operand.\n"
50725"///\n"
50726"/// \\headerfile <x86intrin.h>\n"
50727"///\n"
50728"/// This intrinsic corresponds to the <c> VDIVSS / DIVSS </c> instructions.\n"
50729"///\n"
50730"/// \\param __a\n"
50731"/// A 128-bit vector of [4 x float] containing the dividend. The lower 32\n"
50732"/// bits of this operand are used in the calculation.\n"
50733"/// \\param __b\n"
50734"/// A 128-bit vector of [4 x float] containing the divisor. The lower 32 bits\n"
50735"/// of this operand are used in the calculation.\n"
50736"/// \\returns A 128-bit vector of [4 x float] containing the quotients of the\n"
50737"/// lower 32 bits of both operands. The upper 96 bits are copied from the\n"
50738"/// upper 96 bits of the first source operand.\n"
50739"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
50740"_mm_div_ss(__m128 __a, __m128 __b)\n"
50741"{\n"
50742" __a[0] /= __b[0];\n"
50743" return __a;\n"
50744"}\n"
50745"\n"
50746"/// Divides two 128-bit vectors of [4 x float].\n"
50747"///\n"
50748"/// \\headerfile <x86intrin.h>\n"
50749"///\n"
50750"/// This intrinsic corresponds to the <c> VDIVPS / DIVPS </c> instructions.\n"
50751"///\n"
50752"/// \\param __a\n"
50753"/// A 128-bit vector of [4 x float] containing the dividend.\n"
50754"/// \\param __b\n"
50755"/// A 128-bit vector of [4 x float] containing the divisor.\n"
50756"/// \\returns A 128-bit vector of [4 x float] containing the quotients of both\n"
50757"/// operands.\n"
50758"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
50759"_mm_div_ps(__m128 __a, __m128 __b)\n"
50760"{\n"
50761" return (__m128)((__v4sf)__a / (__v4sf)__b);\n"
50762"}\n"
50763"\n"
50764"/// Calculates the square root of the value stored in the low-order bits\n"
50765"/// of a 128-bit vector of [4 x float].\n"
50766"///\n"
50767"/// \\headerfile <x86intrin.h>\n"
50768"///\n"
50769"/// This intrinsic corresponds to the <c> VSQRTSS / SQRTSS </c> instructions.\n"
50770"///\n"
50771"/// \\param __a\n"
50772"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
50773"/// used in the calculation.\n"
50774"/// \\returns A 128-bit vector of [4 x float] containing the square root of the\n"
50775"/// value in the low-order bits of the operand.\n"
50776"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
50777"_mm_sqrt_ss(__m128 __a)\n"
50778"{\n"
50779" return (__m128)__builtin_ia32_sqrtss((__v4sf)__a);\n"
50780"}\n"
50781"\n"
50782"/// Calculates the square roots of the values stored in a 128-bit vector\n"
50783"/// of [4 x float].\n"
50784"///\n"
50785"/// \\headerfile <x86intrin.h>\n"
50786"///\n"
50787"/// This intrinsic corresponds to the <c> VSQRTPS / SQRTPS </c> instructions.\n"
50788"///\n"
50789"/// \\param __a\n"
50790"/// A 128-bit vector of [4 x float].\n"
50791"/// \\returns A 128-bit vector of [4 x float] containing the square roots of the\n"
50792"/// values in the operand.\n"
50793"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
50794"_mm_sqrt_ps(__m128 __a)\n"
50795"{\n"
50796" return __builtin_ia32_sqrtps((__v4sf)__a);\n"
50797"}\n"
50798"\n"
50799"/// Calculates the approximate reciprocal of the value stored in the\n"
50800"/// low-order bits of a 128-bit vector of [4 x float].\n"
50801"///\n"
50802"/// \\headerfile <x86intrin.h>\n"
50803"///\n"
50804"/// This intrinsic corresponds to the <c> VRCPSS / RCPSS </c> instructions.\n"
50805"///\n"
50806"/// \\param __a\n"
50807"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
50808"/// used in the calculation.\n"
50809"/// \\returns A 128-bit vector of [4 x float] containing the approximate\n"
50810"/// reciprocal of the value in the low-order bits of the operand.\n"
50811"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
50812"_mm_rcp_ss(__m128 __a)\n"
50813"{\n"
50814" return (__m128)__builtin_ia32_rcpss((__v4sf)__a);\n"
50815"}\n"
50816"\n"
50817"/// Calculates the approximate reciprocals of the values stored in a\n"
50818"/// 128-bit vector of [4 x float].\n"
50819"///\n"
50820"/// \\headerfile <x86intrin.h>\n"
50821"///\n"
50822"/// This intrinsic corresponds to the <c> VRCPPS / RCPPS </c> instructions.\n"
50823"///\n"
50824"/// \\param __a\n"
50825"/// A 128-bit vector of [4 x float].\n"
50826"/// \\returns A 128-bit vector of [4 x float] containing the approximate\n"
50827"/// reciprocals of the values in the operand.\n"
50828"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
50829"_mm_rcp_ps(__m128 __a)\n"
50830"{\n"
50831" return (__m128)__builtin_ia32_rcpps((__v4sf)__a);\n"
50832"}\n"
50833"\n"
50834"/// Calculates the approximate reciprocal of the square root of the value\n"
50835"/// stored in the low-order bits of a 128-bit vector of [4 x float].\n"
50836"///\n"
50837"/// \\headerfile <x86intrin.h>\n"
50838"///\n"
50839"/// This intrinsic corresponds to the <c> VRSQRTSS / RSQRTSS </c> instructions.\n"
50840"///\n"
50841"/// \\param __a\n"
50842"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
50843"/// used in the calculation.\n"
50844"/// \\returns A 128-bit vector of [4 x float] containing the approximate\n"
50845"/// reciprocal of the square root of the value in the low-order bits of the\n"
50846"/// operand.\n"
50847"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
50848"_mm_rsqrt_ss(__m128 __a)\n"
50849"{\n"
50850" return __builtin_ia32_rsqrtss((__v4sf)__a);\n"
50851"}\n"
50852"\n"
50853"/// Calculates the approximate reciprocals of the square roots of the\n"
50854"/// values stored in a 128-bit vector of [4 x float].\n"
50855"///\n"
50856"/// \\headerfile <x86intrin.h>\n"
50857"///\n"
50858"/// This intrinsic corresponds to the <c> VRSQRTPS / RSQRTPS </c> instructions.\n"
50859"///\n"
50860"/// \\param __a\n"
50861"/// A 128-bit vector of [4 x float].\n"
50862"/// \\returns A 128-bit vector of [4 x float] containing the approximate\n"
50863"/// reciprocals of the square roots of the values in the operand.\n"
50864"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
50865"_mm_rsqrt_ps(__m128 __a)\n"
50866"{\n"
50867" return __builtin_ia32_rsqrtps((__v4sf)__a);\n"
50868"}\n"
50869"\n"
50870"/// Compares two 32-bit float values in the low-order bits of both\n"
50871"/// operands and returns the lesser value in the low-order bits of the\n"
50872"/// vector of [4 x float].\n"
50873"///\n"
50874"/// \\headerfile <x86intrin.h>\n"
50875"///\n"
50876"/// This intrinsic corresponds to the <c> VMINSS / MINSS </c> instructions.\n"
50877"///\n"
50878"/// \\param __a\n"
50879"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
50880"/// 32 bits of this operand are used in the comparison.\n"
50881"/// \\param __b\n"
50882"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
50883"/// 32 bits of this operand are used in the comparison.\n"
50884"/// \\returns A 128-bit vector of [4 x float] whose lower 32 bits contain the\n"
50885"/// minimum value between both operands. The upper 96 bits are copied from\n"
50886"/// the upper 96 bits of the first source operand.\n"
50887"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
50888"_mm_min_ss(__m128 __a, __m128 __b)\n"
50889"{\n"
50890" return __builtin_ia32_minss((__v4sf)__a, (__v4sf)__b);\n"
50891"}\n"
50892"\n"
50893"/// Compares two 128-bit vectors of [4 x float] and returns the lesser\n"
50894"/// of each pair of values.\n"
50895"///\n"
50896"/// \\headerfile <x86intrin.h>\n"
50897"///\n"
50898"/// This intrinsic corresponds to the <c> VMINPS / MINPS </c> instructions.\n"
50899"///\n"
50900"/// \\param __a\n"
50901"/// A 128-bit vector of [4 x float] containing one of the operands.\n"
50902"/// \\param __b\n"
50903"/// A 128-bit vector of [4 x float] containing one of the operands.\n"
50904"/// \\returns A 128-bit vector of [4 x float] containing the minimum values\n"
50905"/// between both operands.\n"
50906"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
50907"_mm_min_ps(__m128 __a, __m128 __b)\n"
50908"{\n"
50909" return __builtin_ia32_minps((__v4sf)__a, (__v4sf)__b);\n"
50910"}\n"
50911"\n"
50912"/// Compares two 32-bit float values in the low-order bits of both\n"
50913"/// operands and returns the greater value in the low-order bits of a 128-bit\n"
50914"/// vector of [4 x float].\n"
50915"///\n"
50916"/// \\headerfile <x86intrin.h>\n"
50917"///\n"
50918"/// This intrinsic corresponds to the <c> VMAXSS / MAXSS </c> instructions.\n"
50919"///\n"
50920"/// \\param __a\n"
50921"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
50922"/// 32 bits of this operand are used in the comparison.\n"
50923"/// \\param __b\n"
50924"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
50925"/// 32 bits of this operand are used in the comparison.\n"
50926"/// \\returns A 128-bit vector of [4 x float] whose lower 32 bits contain the\n"
50927"/// maximum value between both operands. The upper 96 bits are copied from\n"
50928"/// the upper 96 bits of the first source operand.\n"
50929"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
50930"_mm_max_ss(__m128 __a, __m128 __b)\n"
50931"{\n"
50932" return __builtin_ia32_maxss((__v4sf)__a, (__v4sf)__b);\n"
50933"}\n"
50934"\n"
50935"/// Compares two 128-bit vectors of [4 x float] and returns the greater\n"
50936"/// of each pair of values.\n"
50937"///\n"
50938"/// \\headerfile <x86intrin.h>\n"
50939"///\n"
50940"/// This intrinsic corresponds to the <c> VMAXPS / MAXPS </c> instructions.\n"
50941"///\n"
50942"/// \\param __a\n"
50943"/// A 128-bit vector of [4 x float] containing one of the operands.\n"
50944"/// \\param __b\n"
50945"/// A 128-bit vector of [4 x float] containing one of the operands.\n"
50946"/// \\returns A 128-bit vector of [4 x float] containing the maximum values\n"
50947"/// between both operands.\n"
50948"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
50949"_mm_max_ps(__m128 __a, __m128 __b)\n"
50950"{\n"
50951" return __builtin_ia32_maxps((__v4sf)__a, (__v4sf)__b);\n"
50952"}\n"
50953"\n"
50954"/// Performs a bitwise AND of two 128-bit vectors of [4 x float].\n"
50955"///\n"
50956"/// \\headerfile <x86intrin.h>\n"
50957"///\n"
50958"/// This intrinsic corresponds to the <c> VANDPS / ANDPS </c> instructions.\n"
50959"///\n"
50960"/// \\param __a\n"
50961"/// A 128-bit vector containing one of the source operands.\n"
50962"/// \\param __b\n"
50963"/// A 128-bit vector containing one of the source operands.\n"
50964"/// \\returns A 128-bit vector of [4 x float] containing the bitwise AND of the\n"
50965"/// values between both operands.\n"
50966"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
50967"_mm_and_ps(__m128 __a, __m128 __b)\n"
50968"{\n"
50969" return (__m128)((__v4su)__a & (__v4su)__b);\n"
50970"}\n"
50971"\n"
50972"/// Performs a bitwise AND of two 128-bit vectors of [4 x float], using\n"
50973"/// the one's complement of the values contained in the first source\n"
50974"/// operand.\n"
50975"///\n"
50976"/// \\headerfile <x86intrin.h>\n"
50977"///\n"
50978"/// This intrinsic corresponds to the <c> VANDNPS / ANDNPS </c> instructions.\n"
50979"///\n"
50980"/// \\param __a\n"
50981"/// A 128-bit vector of [4 x float] containing the first source operand. The\n"
50982"/// one's complement of this value is used in the bitwise AND.\n"
50983"/// \\param __b\n"
50984"/// A 128-bit vector of [4 x float] containing the second source operand.\n"
50985"/// \\returns A 128-bit vector of [4 x float] containing the bitwise AND of the\n"
50986"/// one's complement of the first operand and the values in the second\n"
50987"/// operand.\n"
50988"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
50989"_mm_andnot_ps(__m128 __a, __m128 __b)\n"
50990"{\n"
50991" return (__m128)(~(__v4su)__a & (__v4su)__b);\n"
50992"}\n"
50993"\n"
50994"/// Performs a bitwise OR of two 128-bit vectors of [4 x float].\n"
50995"///\n"
50996"/// \\headerfile <x86intrin.h>\n"
50997"///\n"
50998"/// This intrinsic corresponds to the <c> VORPS / ORPS </c> instructions.\n"
50999"///\n"
51000"/// \\param __a\n"
51001"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
51002"/// \\param __b\n"
51003"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
51004"/// \\returns A 128-bit vector of [4 x float] containing the bitwise OR of the\n"
51005"/// values between both operands.\n"
51006"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
51007"_mm_or_ps(__m128 __a, __m128 __b)\n"
51008"{\n"
51009" return (__m128)((__v4su)__a | (__v4su)__b);\n"
51010"}\n"
51011"\n"
51012"/// Performs a bitwise exclusive OR of two 128-bit vectors of\n"
51013"/// [4 x float].\n"
51014"///\n"
51015"/// \\headerfile <x86intrin.h>\n"
51016"///\n"
51017"/// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instructions.\n"
51018"///\n"
51019"/// \\param __a\n"
51020"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
51021"/// \\param __b\n"
51022"/// A 128-bit vector of [4 x float] containing one of the source operands.\n"
51023"/// \\returns A 128-bit vector of [4 x float] containing the bitwise exclusive OR\n"
51024"/// of the values between both operands.\n"
51025"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
51026"_mm_xor_ps(__m128 __a, __m128 __b)\n"
51027"{\n"
51028" return (__m128)((__v4su)__a ^ (__v4su)__b);\n"
51029"}\n"
51030"\n"
51031"/// Compares two 32-bit float values in the low-order bits of both\n"
51032"/// operands for equality and returns the result of the comparison in the\n"
51033"/// low-order bits of a vector [4 x float].\n"
51034"///\n"
51035"/// \\headerfile <x86intrin.h>\n"
51036"///\n"
51037"/// This intrinsic corresponds to the <c> VCMPEQSS / CMPEQSS </c> instructions.\n"
51038"///\n"
51039"/// \\param __a\n"
51040"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
51041"/// 32 bits of this operand are used in the comparison.\n"
51042"/// \\param __b\n"
51043"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
51044"/// 32 bits of this operand are used in the comparison.\n"
51045"/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n"
51046"/// in the low-order bits.\n"
51047"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
51048"_mm_cmpeq_ss(__m128 __a, __m128 __b)\n"
51049"{\n"
51050" return (__m128)__builtin_ia32_cmpeqss((__v4sf)__a, (__v4sf)__b);\n"
51051"}\n"
51052"\n"
51053"/// Compares each of the corresponding 32-bit float values of the\n"
51054"/// 128-bit vectors of [4 x float] for equality.\n"
51055"///\n"
51056"/// \\headerfile <x86intrin.h>\n"
51057"///\n"
51058"/// This intrinsic corresponds to the <c> VCMPEQPS / CMPEQPS </c> instructions.\n"
51059"///\n"
51060"/// \\param __a\n"
51061"/// A 128-bit vector of [4 x float].\n"
51062"/// \\param __b\n"
51063"/// A 128-bit vector of [4 x float].\n"
51064"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
51065"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
51066"_mm_cmpeq_ps(__m128 __a, __m128 __b)\n"
51067"{\n"
51068" return (__m128)__builtin_ia32_cmpeqps((__v4sf)__a, (__v4sf)__b);\n"
51069"}\n"
51070"\n"
51071"/// Compares two 32-bit float values in the low-order bits of both\n"
51072"/// operands to determine if the value in the first operand is less than the\n"
51073"/// corresponding value in the second operand and returns the result of the\n"
51074"/// comparison in the low-order bits of a vector of [4 x float].\n"
51075"///\n"
51076"/// \\headerfile <x86intrin.h>\n"
51077"///\n"
51078"/// This intrinsic corresponds to the <c> VCMPLTSS / CMPLTSS </c> instructions.\n"
51079"///\n"
51080"/// \\param __a\n"
51081"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
51082"/// 32 bits of this operand are used in the comparison.\n"
51083"/// \\param __b\n"
51084"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
51085"/// 32 bits of this operand are used in the comparison.\n"
51086"/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n"
51087"/// in the low-order bits.\n"
51088"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
51089"_mm_cmplt_ss(__m128 __a, __m128 __b)\n"
51090"{\n"
51091" return (__m128)__builtin_ia32_cmpltss((__v4sf)__a, (__v4sf)__b);\n"
51092"}\n"
51093"\n"
51094"/// Compares each of the corresponding 32-bit float values of the\n"
51095"/// 128-bit vectors of [4 x float] to determine if the values in the first\n"
51096"/// operand are less than those in the second operand.\n"
51097"///\n"
51098"/// \\headerfile <x86intrin.h>\n"
51099"///\n"
51100"/// This intrinsic corresponds to the <c> VCMPLTPS / CMPLTPS </c> instructions.\n"
51101"///\n"
51102"/// \\param __a\n"
51103"/// A 128-bit vector of [4 x float].\n"
51104"/// \\param __b\n"
51105"/// A 128-bit vector of [4 x float].\n"
51106"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
51107"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
51108"_mm_cmplt_ps(__m128 __a, __m128 __b)\n"
51109"{\n"
51110" return (__m128)__builtin_ia32_cmpltps((__v4sf)__a, (__v4sf)__b);\n"
51111"}\n"
51112"\n"
51113"/// Compares two 32-bit float values in the low-order bits of both\n"
51114"/// operands to determine if the value in the first operand is less than or\n"
51115"/// equal to the corresponding value in the second operand and returns the\n"
51116"/// result of the comparison in the low-order bits of a vector of\n"
51117"/// [4 x float].\n"
51118"///\n"
51119"/// \\headerfile <x86intrin.h>\n"
51120"///\n"
51121"/// This intrinsic corresponds to the <c> VCMPLESS / CMPLESS </c> instructions.\n"
51122"///\n"
51123"/// \\param __a\n"
51124"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
51125"/// 32 bits of this operand are used in the comparison.\n"
51126"/// \\param __b\n"
51127"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
51128"/// 32 bits of this operand are used in the comparison.\n"
51129"/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n"
51130"/// in the low-order bits.\n"
51131"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
51132"_mm_cmple_ss(__m128 __a, __m128 __b)\n"
51133"{\n"
51134" return (__m128)__builtin_ia32_cmpless((__v4sf)__a, (__v4sf)__b);\n"
51135"}\n"
51136"\n"
51137"/// Compares each of the corresponding 32-bit float values of the\n"
51138"/// 128-bit vectors of [4 x float] to determine if the values in the first\n"
51139"/// operand are less than or equal to those in the second operand.\n"
51140"///\n"
51141"/// \\headerfile <x86intrin.h>\n"
51142"///\n"
51143"/// This intrinsic corresponds to the <c> VCMPLEPS / CMPLEPS </c> instructions.\n"
51144"///\n"
51145"/// \\param __a\n"
51146"/// A 128-bit vector of [4 x float].\n"
51147"/// \\param __b\n"
51148"/// A 128-bit vector of [4 x float].\n"
51149"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
51150"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
51151"_mm_cmple_ps(__m128 __a, __m128 __b)\n"
51152"{\n"
51153" return (__m128)__builtin_ia32_cmpleps((__v4sf)__a, (__v4sf)__b);\n"
51154"}\n"
51155"\n"
51156"/// Compares two 32-bit float values in the low-order bits of both\n"
51157"/// operands to determine if the value in the first operand is greater than\n"
51158"/// the corresponding value in the second operand and returns the result of\n"
51159"/// the comparison in the low-order bits of a vector of [4 x float].\n"
51160"///\n"
51161"/// \\headerfile <x86intrin.h>\n"
51162"///\n"
51163"/// This intrinsic corresponds to the <c> VCMPLTSS / CMPLTSS </c> instructions.\n"
51164"///\n"
51165"/// \\param __a\n"
51166"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
51167"/// 32 bits of this operand are used in the comparison.\n"
51168"/// \\param __b\n"
51169"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
51170"/// 32 bits of this operand are used in the comparison.\n"
51171"/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n"
51172"/// in the low-order bits.\n"
51173"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
51174"_mm_cmpgt_ss(__m128 __a, __m128 __b)\n"
51175"{\n"
51176" return (__m128)__builtin_shufflevector((__v4sf)__a,\n"
51177" (__v4sf)__builtin_ia32_cmpltss((__v4sf)__b, (__v4sf)__a),\n"
51178" 4, 1, 2, 3);\n"
51179"}\n"
51180"\n"
51181"/// Compares each of the corresponding 32-bit float values of the\n"
51182"/// 128-bit vectors of [4 x float] to determine if the values in the first\n"
51183"/// operand are greater than those in the second operand.\n"
51184"///\n"
51185"/// \\headerfile <x86intrin.h>\n"
51186"///\n"
51187"/// This intrinsic corresponds to the <c> VCMPLTPS / CMPLTPS </c> instructions.\n"
51188"///\n"
51189"/// \\param __a\n"
51190"/// A 128-bit vector of [4 x float].\n"
51191"/// \\param __b\n"
51192"/// A 128-bit vector of [4 x float].\n"
51193"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
51194"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
51195"_mm_cmpgt_ps(__m128 __a, __m128 __b)\n"
51196"{\n"
51197" return (__m128)__builtin_ia32_cmpltps((__v4sf)__b, (__v4sf)__a);\n"
51198"}\n"
51199"\n"
51200"/// Compares two 32-bit float values in the low-order bits of both\n"
51201"/// operands to determine if the value in the first operand is greater than\n"
51202"/// or equal to the corresponding value in the second operand and returns\n"
51203"/// the result of the comparison in the low-order bits of a vector of\n"
51204"/// [4 x float].\n"
51205"///\n"
51206"/// \\headerfile <x86intrin.h>\n"
51207"///\n"
51208"/// This intrinsic corresponds to the <c> VCMPLESS / CMPLESS </c> instructions.\n"
51209"///\n"
51210"/// \\param __a\n"
51211"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
51212"/// 32 bits of this operand are used in the comparison.\n"
51213"/// \\param __b\n"
51214"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
51215"/// 32 bits of this operand are used in the comparison.\n"
51216"/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n"
51217"/// in the low-order bits.\n"
51218"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
51219"_mm_cmpge_ss(__m128 __a, __m128 __b)\n"
51220"{\n"
51221" return (__m128)__builtin_shufflevector((__v4sf)__a,\n"
51222" (__v4sf)__builtin_ia32_cmpless((__v4sf)__b, (__v4sf)__a),\n"
51223" 4, 1, 2, 3);\n"
51224"}\n"
51225"\n"
51226"/// Compares each of the corresponding 32-bit float values of the\n"
51227"/// 128-bit vectors of [4 x float] to determine if the values in the first\n"
51228"/// operand are greater than or equal to those in the second operand.\n"
51229"///\n"
51230"/// \\headerfile <x86intrin.h>\n"
51231"///\n"
51232"/// This intrinsic corresponds to the <c> VCMPLEPS / CMPLEPS </c> instructions.\n"
51233"///\n"
51234"/// \\param __a\n"
51235"/// A 128-bit vector of [4 x float].\n"
51236"/// \\param __b\n"
51237"/// A 128-bit vector of [4 x float].\n"
51238"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
51239"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
51240"_mm_cmpge_ps(__m128 __a, __m128 __b)\n"
51241"{\n"
51242" return (__m128)__builtin_ia32_cmpleps((__v4sf)__b, (__v4sf)__a);\n"
51243"}\n"
51244"\n"
51245"/// Compares two 32-bit float values in the low-order bits of both\n"
51246"/// operands for inequality and returns the result of the comparison in the\n"
51247"/// low-order bits of a vector of [4 x float].\n"
51248"///\n"
51249"/// \\headerfile <x86intrin.h>\n"
51250"///\n"
51251"/// This intrinsic corresponds to the <c> VCMPNEQSS / CMPNEQSS </c>\n"
51252"/// instructions.\n"
51253"///\n"
51254"/// \\param __a\n"
51255"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
51256"/// 32 bits of this operand are used in the comparison.\n"
51257"/// \\param __b\n"
51258"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
51259"/// 32 bits of this operand are used in the comparison.\n"
51260"/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n"
51261"/// in the low-order bits.\n"
51262"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
51263"_mm_cmpneq_ss(__m128 __a, __m128 __b)\n"
51264"{\n"
51265" return (__m128)__builtin_ia32_cmpneqss((__v4sf)__a, (__v4sf)__b);\n"
51266"}\n"
51267"\n"
51268"/// Compares each of the corresponding 32-bit float values of the\n"
51269"/// 128-bit vectors of [4 x float] for inequality.\n"
51270"///\n"
51271"/// \\headerfile <x86intrin.h>\n"
51272"///\n"
51273"/// This intrinsic corresponds to the <c> VCMPNEQPS / CMPNEQPS </c>\n"
51274"/// instructions.\n"
51275"///\n"
51276"/// \\param __a\n"
51277"/// A 128-bit vector of [4 x float].\n"
51278"/// \\param __b\n"
51279"/// A 128-bit vector of [4 x float].\n"
51280"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
51281"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
51282"_mm_cmpneq_ps(__m128 __a, __m128 __b)\n"
51283"{\n"
51284" return (__m128)__builtin_ia32_cmpneqps((__v4sf)__a, (__v4sf)__b);\n"
51285"}\n"
51286"\n"
51287"/// Compares two 32-bit float values in the low-order bits of both\n"
51288"/// operands to determine if the value in the first operand is not less than\n"
51289"/// the corresponding value in the second operand and returns the result of\n"
51290"/// the comparison in the low-order bits of a vector of [4 x float].\n"
51291"///\n"
51292"/// \\headerfile <x86intrin.h>\n"
51293"///\n"
51294"/// This intrinsic corresponds to the <c> VCMPNLTSS / CMPNLTSS </c>\n"
51295"/// instructions.\n"
51296"///\n"
51297"/// \\param __a\n"
51298"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
51299"/// 32 bits of this operand are used in the comparison.\n"
51300"/// \\param __b\n"
51301"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
51302"/// 32 bits of this operand are used in the comparison.\n"
51303"/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n"
51304"/// in the low-order bits.\n"
51305"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
51306"_mm_cmpnlt_ss(__m128 __a, __m128 __b)\n"
51307"{\n"
51308" return (__m128)__builtin_ia32_cmpnltss((__v4sf)__a, (__v4sf)__b);\n"
51309"}\n"
51310"\n"
51311"/// Compares each of the corresponding 32-bit float values of the\n"
51312"/// 128-bit vectors of [4 x float] to determine if the values in the first\n"
51313"/// operand are not less than those in the second operand.\n"
51314"///\n"
51315"/// \\headerfile <x86intrin.h>\n"
51316"///\n"
51317"/// This intrinsic corresponds to the <c> VCMPNLTPS / CMPNLTPS </c>\n"
51318"/// instructions.\n"
51319"///\n"
51320"/// \\param __a\n"
51321"/// A 128-bit vector of [4 x float].\n"
51322"/// \\param __b\n"
51323"/// A 128-bit vector of [4 x float].\n"
51324"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
51325"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
51326"_mm_cmpnlt_ps(__m128 __a, __m128 __b)\n"
51327"{\n"
51328" return (__m128)__builtin_ia32_cmpnltps((__v4sf)__a, (__v4sf)__b);\n"
51329"}\n"
51330"\n"
51331"/// Compares two 32-bit float values in the low-order bits of both\n"
51332"/// operands to determine if the value in the first operand is not less than\n"
51333"/// or equal to the corresponding value in the second operand and returns\n"
51334"/// the result of the comparison in the low-order bits of a vector of\n"
51335"/// [4 x float].\n"
51336"///\n"
51337"/// \\headerfile <x86intrin.h>\n"
51338"///\n"
51339"/// This intrinsic corresponds to the <c> VCMPNLESS / CMPNLESS </c>\n"
51340"/// instructions.\n"
51341"///\n"
51342"/// \\param __a\n"
51343"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
51344"/// 32 bits of this operand are used in the comparison.\n"
51345"/// \\param __b\n"
51346"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
51347"/// 32 bits of this operand are used in the comparison.\n"
51348"/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n"
51349"/// in the low-order bits.\n"
51350"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
51351"_mm_cmpnle_ss(__m128 __a, __m128 __b)\n"
51352"{\n"
51353" return (__m128)__builtin_ia32_cmpnless((__v4sf)__a, (__v4sf)__b);\n"
51354"}\n"
51355"\n"
51356"/// Compares each of the corresponding 32-bit float values of the\n"
51357"/// 128-bit vectors of [4 x float] to determine if the values in the first\n"
51358"/// operand are not less than or equal to those in the second operand.\n"
51359"///\n"
51360"/// \\headerfile <x86intrin.h>\n"
51361"///\n"
51362"/// This intrinsic corresponds to the <c> VCMPNLEPS / CMPNLEPS </c>\n"
51363"/// instructions.\n"
51364"///\n"
51365"/// \\param __a\n"
51366"/// A 128-bit vector of [4 x float].\n"
51367"/// \\param __b\n"
51368"/// A 128-bit vector of [4 x float].\n"
51369"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
51370"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
51371"_mm_cmpnle_ps(__m128 __a, __m128 __b)\n"
51372"{\n"
51373" return (__m128)__builtin_ia32_cmpnleps((__v4sf)__a, (__v4sf)__b);\n"
51374"}\n"
51375"\n"
51376"/// Compares two 32-bit float values in the low-order bits of both\n"
51377"/// operands to determine if the value in the first operand is not greater\n"
51378"/// than the corresponding value in the second operand and returns the\n"
51379"/// result of the comparison in the low-order bits of a vector of\n"
51380"/// [4 x float].\n"
51381"///\n"
51382"/// \\headerfile <x86intrin.h>\n"
51383"///\n"
51384"/// This intrinsic corresponds to the <c> VCMPNLTSS / CMPNLTSS </c>\n"
51385"/// instructions.\n"
51386"///\n"
51387"/// \\param __a\n"
51388"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
51389"/// 32 bits of this operand are used in the comparison.\n"
51390"/// \\param __b\n"
51391"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
51392"/// 32 bits of this operand are used in the comparison.\n"
51393"/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n"
51394"/// in the low-order bits.\n"
51395"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
51396"_mm_cmpngt_ss(__m128 __a, __m128 __b)\n"
51397"{\n"
51398" return (__m128)__builtin_shufflevector((__v4sf)__a,\n"
51399" (__v4sf)__builtin_ia32_cmpnltss((__v4sf)__b, (__v4sf)__a),\n"
51400" 4, 1, 2, 3);\n"
51401"}\n"
51402"\n"
51403"/// Compares each of the corresponding 32-bit float values of the\n"
51404"/// 128-bit vectors of [4 x float] to determine if the values in the first\n"
51405"/// operand are not greater than those in the second operand.\n"
51406"///\n"
51407"/// \\headerfile <x86intrin.h>\n"
51408"///\n"
51409"/// This intrinsic corresponds to the <c> VCMPNLTPS / CMPNLTPS </c>\n"
51410"/// instructions.\n"
51411"///\n"
51412"/// \\param __a\n"
51413"/// A 128-bit vector of [4 x float].\n"
51414"/// \\param __b\n"
51415"/// A 128-bit vector of [4 x float].\n"
51416"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
51417"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
51418"_mm_cmpngt_ps(__m128 __a, __m128 __b)\n"
51419"{\n"
51420" return (__m128)__builtin_ia32_cmpnltps((__v4sf)__b, (__v4sf)__a);\n"
51421"}\n"
51422"\n"
51423"/// Compares two 32-bit float values in the low-order bits of both\n"
51424"/// operands to determine if the value in the first operand is not greater\n"
51425"/// than or equal to the corresponding value in the second operand and\n"
51426"/// returns the result of the comparison in the low-order bits of a vector\n"
51427"/// of [4 x float].\n"
51428"///\n"
51429"/// \\headerfile <x86intrin.h>\n"
51430"///\n"
51431"/// This intrinsic corresponds to the <c> VCMPNLESS / CMPNLESS </c>\n"
51432"/// instructions.\n"
51433"///\n"
51434"/// \\param __a\n"
51435"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
51436"/// 32 bits of this operand are used in the comparison.\n"
51437"/// \\param __b\n"
51438"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
51439"/// 32 bits of this operand are used in the comparison.\n"
51440"/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n"
51441"/// in the low-order bits.\n"
51442"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
51443"_mm_cmpnge_ss(__m128 __a, __m128 __b)\n"
51444"{\n"
51445" return (__m128)__builtin_shufflevector((__v4sf)__a,\n"
51446" (__v4sf)__builtin_ia32_cmpnless((__v4sf)__b, (__v4sf)__a),\n"
51447" 4, 1, 2, 3);\n"
51448"}\n"
51449"\n"
51450"/// Compares each of the corresponding 32-bit float values of the\n"
51451"/// 128-bit vectors of [4 x float] to determine if the values in the first\n"
51452"/// operand are not greater than or equal to those in the second operand.\n"
51453"///\n"
51454"/// \\headerfile <x86intrin.h>\n"
51455"///\n"
51456"/// This intrinsic corresponds to the <c> VCMPNLEPS / CMPNLEPS </c>\n"
51457"/// instructions.\n"
51458"///\n"
51459"/// \\param __a\n"
51460"/// A 128-bit vector of [4 x float].\n"
51461"/// \\param __b\n"
51462"/// A 128-bit vector of [4 x float].\n"
51463"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
51464"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
51465"_mm_cmpnge_ps(__m128 __a, __m128 __b)\n"
51466"{\n"
51467" return (__m128)__builtin_ia32_cmpnleps((__v4sf)__b, (__v4sf)__a);\n"
51468"}\n"
51469"\n"
51470"/// Compares two 32-bit float values in the low-order bits of both\n"
51471"/// operands to determine if the value in the first operand is ordered with\n"
51472"/// respect to the corresponding value in the second operand and returns the\n"
51473"/// result of the comparison in the low-order bits of a vector of\n"
51474"/// [4 x float].\n"
51475"///\n"
51476"/// \\headerfile <x86intrin.h>\n"
51477"///\n"
51478"/// This intrinsic corresponds to the <c> VCMPORDSS / CMPORDSS </c>\n"
51479"/// instructions.\n"
51480"///\n"
51481"/// \\param __a\n"
51482"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
51483"/// 32 bits of this operand are used in the comparison.\n"
51484"/// \\param __b\n"
51485"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
51486"/// 32 bits of this operand are used in the comparison.\n"
51487"/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n"
51488"/// in the low-order bits.\n"
51489"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
51490"_mm_cmpord_ss(__m128 __a, __m128 __b)\n"
51491"{\n"
51492" return (__m128)__builtin_ia32_cmpordss((__v4sf)__a, (__v4sf)__b);\n"
51493"}\n"
51494"\n"
51495"/// Compares each of the corresponding 32-bit float values of the\n"
51496"/// 128-bit vectors of [4 x float] to determine if the values in the first\n"
51497"/// operand are ordered with respect to those in the second operand.\n"
51498"///\n"
51499"/// \\headerfile <x86intrin.h>\n"
51500"///\n"
51501"/// This intrinsic corresponds to the <c> VCMPORDPS / CMPORDPS </c>\n"
51502"/// instructions.\n"
51503"///\n"
51504"/// \\param __a\n"
51505"/// A 128-bit vector of [4 x float].\n"
51506"/// \\param __b\n"
51507"/// A 128-bit vector of [4 x float].\n"
51508"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
51509"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
51510"_mm_cmpord_ps(__m128 __a, __m128 __b)\n"
51511"{\n"
51512" return (__m128)__builtin_ia32_cmpordps((__v4sf)__a, (__v4sf)__b);\n"
51513"}\n"
51514"\n"
51515"/// Compares two 32-bit float values in the low-order bits of both\n"
51516"/// operands to determine if the value in the first operand is unordered\n"
51517"/// with respect to the corresponding value in the second operand and\n"
51518"/// returns the result of the comparison in the low-order bits of a vector\n"
51519"/// of [4 x float].\n"
51520"///\n"
51521"/// \\headerfile <x86intrin.h>\n"
51522"///\n"
51523"/// This intrinsic corresponds to the <c> VCMPUNORDSS / CMPUNORDSS </c>\n"
51524"/// instructions.\n"
51525"///\n"
51526"/// \\param __a\n"
51527"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
51528"/// 32 bits of this operand are used in the comparison.\n"
51529"/// \\param __b\n"
51530"/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n"
51531"/// 32 bits of this operand are used in the comparison.\n"
51532"/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n"
51533"/// in the low-order bits.\n"
51534"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
51535"_mm_cmpunord_ss(__m128 __a, __m128 __b)\n"
51536"{\n"
51537" return (__m128)__builtin_ia32_cmpunordss((__v4sf)__a, (__v4sf)__b);\n"
51538"}\n"
51539"\n"
51540"/// Compares each of the corresponding 32-bit float values of the\n"
51541"/// 128-bit vectors of [4 x float] to determine if the values in the first\n"
51542"/// operand are unordered with respect to those in the second operand.\n"
51543"///\n"
51544"/// \\headerfile <x86intrin.h>\n"
51545"///\n"
51546"/// This intrinsic corresponds to the <c> VCMPUNORDPS / CMPUNORDPS </c>\n"
51547"/// instructions.\n"
51548"///\n"
51549"/// \\param __a\n"
51550"/// A 128-bit vector of [4 x float].\n"
51551"/// \\param __b\n"
51552"/// A 128-bit vector of [4 x float].\n"
51553"/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n"
51554"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
51555"_mm_cmpunord_ps(__m128 __a, __m128 __b)\n"
51556"{\n"
51557" return (__m128)__builtin_ia32_cmpunordps((__v4sf)__a, (__v4sf)__b);\n"
51558"}\n"
51559"\n"
51560"/// Compares two 32-bit float values in the low-order bits of both\n"
51561"/// operands for equality and returns the result of the comparison.\n"
51562"///\n"
51563"/// If either of the two lower 32-bit values is NaN, 0 is returned.\n"
51564"///\n"
51565"/// \\headerfile <x86intrin.h>\n"
51566"///\n"
51567"/// This intrinsic corresponds to the <c> VCOMISS / COMISS </c>\n"
51568"/// instructions.\n"
51569"///\n"
51570"/// \\param __a\n"
51571"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
51572"/// used in the comparison.\n"
51573"/// \\param __b\n"
51574"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
51575"/// used in the comparison.\n"
51576"/// \\returns An integer containing the comparison results. If either of the\n"
51577"/// two lower 32-bit values is NaN, 0 is returned.\n"
51578"static __inline__ int __DEFAULT_FN_ATTRS\n"
51579"_mm_comieq_ss(__m128 __a, __m128 __b)\n"
51580"{\n"
51581" return __builtin_ia32_comieq((__v4sf)__a, (__v4sf)__b);\n"
51582"}\n"
51583"\n"
51584"/// Compares two 32-bit float values in the low-order bits of both\n"
51585"/// operands to determine if the first operand is less than the second\n"
51586"/// operand and returns the result of the comparison.\n"
51587"///\n"
51588"/// If either of the two lower 32-bit values is NaN, 0 is returned.\n"
51589"///\n"
51590"/// \\headerfile <x86intrin.h>\n"
51591"///\n"
51592"/// This intrinsic corresponds to the <c> VCOMISS / COMISS </c>\n"
51593"/// instructions.\n"
51594"///\n"
51595"/// \\param __a\n"
51596"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
51597"/// used in the comparison.\n"
51598"/// \\param __b\n"
51599"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
51600"/// used in the comparison.\n"
51601"/// \\returns An integer containing the comparison results. If either of the two\n"
51602"/// lower 32-bit values is NaN, 0 is returned.\n"
51603"static __inline__ int __DEFAULT_FN_ATTRS\n"
51604"_mm_comilt_ss(__m128 __a, __m128 __b)\n"
51605"{\n"
51606" return __builtin_ia32_comilt((__v4sf)__a, (__v4sf)__b);\n"
51607"}\n"
51608"\n"
51609"/// Compares two 32-bit float values in the low-order bits of both\n"
51610"/// operands to determine if the first operand is less than or equal to the\n"
51611"/// second operand and returns the result of the comparison.\n"
51612"///\n"
51613"/// If either of the two lower 32-bit values is NaN, 0 is returned.\n"
51614"///\n"
51615"/// \\headerfile <x86intrin.h>\n"
51616"///\n"
51617"/// This intrinsic corresponds to the <c> VCOMISS / COMISS </c> instructions.\n"
51618"///\n"
51619"/// \\param __a\n"
51620"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
51621"/// used in the comparison.\n"
51622"/// \\param __b\n"
51623"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
51624"/// used in the comparison.\n"
51625"/// \\returns An integer containing the comparison results. If either of the two\n"
51626"/// lower 32-bit values is NaN, 0 is returned.\n"
51627"static __inline__ int __DEFAULT_FN_ATTRS\n"
51628"_mm_comile_ss(__m128 __a, __m128 __b)\n"
51629"{\n"
51630" return __builtin_ia32_comile((__v4sf)__a, (__v4sf)__b);\n"
51631"}\n"
51632"\n"
51633"/// Compares two 32-bit float values in the low-order bits of both\n"
51634"/// operands to determine if the first operand is greater than the second\n"
51635"/// operand and returns the result of the comparison.\n"
51636"///\n"
51637"/// If either of the two lower 32-bit values is NaN, 0 is returned.\n"
51638"///\n"
51639"/// \\headerfile <x86intrin.h>\n"
51640"///\n"
51641"/// This intrinsic corresponds to the <c> VCOMISS / COMISS </c> instructions.\n"
51642"///\n"
51643"/// \\param __a\n"
51644"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
51645"/// used in the comparison.\n"
51646"/// \\param __b\n"
51647"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
51648"/// used in the comparison.\n"
51649"/// \\returns An integer containing the comparison results. If either of the\n"
51650"/// two lower 32-bit values is NaN, 0 is returned.\n"
51651"static __inline__ int __DEFAULT_FN_ATTRS\n"
51652"_mm_comigt_ss(__m128 __a, __m128 __b)\n"
51653"{\n"
51654" return __builtin_ia32_comigt((__v4sf)__a, (__v4sf)__b);\n"
51655"}\n"
51656"\n"
51657"/// Compares two 32-bit float values in the low-order bits of both\n"
51658"/// operands to determine if the first operand is greater than or equal to\n"
51659"/// the second operand and returns the result of the comparison.\n"
51660"///\n"
51661"/// If either of the two lower 32-bit values is NaN, 0 is returned.\n"
51662"///\n"
51663"/// \\headerfile <x86intrin.h>\n"
51664"///\n"
51665"/// This intrinsic corresponds to the <c> VCOMISS / COMISS </c> instructions.\n"
51666"///\n"
51667"/// \\param __a\n"
51668"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
51669"/// used in the comparison.\n"
51670"/// \\param __b\n"
51671"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
51672"/// used in the comparison.\n"
51673"/// \\returns An integer containing the comparison results. If either of the two\n"
51674"/// lower 32-bit values is NaN, 0 is returned.\n"
51675"static __inline__ int __DEFAULT_FN_ATTRS\n"
51676"_mm_comige_ss(__m128 __a, __m128 __b)\n"
51677"{\n"
51678" return __builtin_ia32_comige((__v4sf)__a, (__v4sf)__b);\n"
51679"}\n"
51680"\n"
51681"/// Compares two 32-bit float values in the low-order bits of both\n"
51682"/// operands to determine if the first operand is not equal to the second\n"
51683"/// operand and returns the result of the comparison.\n"
51684"///\n"
51685"/// If either of the two lower 32-bit values is NaN, 1 is returned.\n"
51686"///\n"
51687"/// \\headerfile <x86intrin.h>\n"
51688"///\n"
51689"/// This intrinsic corresponds to the <c> VCOMISS / COMISS </c> instructions.\n"
51690"///\n"
51691"/// \\param __a\n"
51692"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
51693"/// used in the comparison.\n"
51694"/// \\param __b\n"
51695"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
51696"/// used in the comparison.\n"
51697"/// \\returns An integer containing the comparison results. If either of the\n"
51698"/// two lower 32-bit values is NaN, 1 is returned.\n"
51699"static __inline__ int __DEFAULT_FN_ATTRS\n"
51700"_mm_comineq_ss(__m128 __a, __m128 __b)\n"
51701"{\n"
51702" return __builtin_ia32_comineq((__v4sf)__a, (__v4sf)__b);\n"
51703"}\n"
51704"\n"
51705"/// Performs an unordered comparison of two 32-bit float values using\n"
51706"/// the low-order bits of both operands to determine equality and returns\n"
51707"/// the result of the comparison.\n"
51708"///\n"
51709"/// If either of the two lower 32-bit values is NaN, 0 is returned.\n"
51710"///\n"
51711"/// \\headerfile <x86intrin.h>\n"
51712"///\n"
51713"/// This intrinsic corresponds to the <c> VUCOMISS / UCOMISS </c> instructions.\n"
51714"///\n"
51715"/// \\param __a\n"
51716"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
51717"/// used in the comparison.\n"
51718"/// \\param __b\n"
51719"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
51720"/// used in the comparison.\n"
51721"/// \\returns An integer containing the comparison results. If either of the two\n"
51722"/// lower 32-bit values is NaN, 0 is returned.\n"
51723"static __inline__ int __DEFAULT_FN_ATTRS\n"
51724"_mm_ucomieq_ss(__m128 __a, __m128 __b)\n"
51725"{\n"
51726" return __builtin_ia32_ucomieq((__v4sf)__a, (__v4sf)__b);\n"
51727"}\n"
51728"\n"
51729"/// Performs an unordered comparison of two 32-bit float values using\n"
51730"/// the low-order bits of both operands to determine if the first operand is\n"
51731"/// less than the second operand and returns the result of the comparison.\n"
51732"///\n"
51733"/// If either of the two lower 32-bit values is NaN, 0 is returned.\n"
51734"///\n"
51735"/// \\headerfile <x86intrin.h>\n"
51736"///\n"
51737"/// This intrinsic corresponds to the <c> VUCOMISS / UCOMISS </c> instructions.\n"
51738"///\n"
51739"/// \\param __a\n"
51740"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
51741"/// used in the comparison.\n"
51742"/// \\param __b\n"
51743"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
51744"/// used in the comparison.\n"
51745"/// \\returns An integer containing the comparison results. If either of the two\n"
51746"/// lower 32-bit values is NaN, 0 is returned.\n"
51747"static __inline__ int __DEFAULT_FN_ATTRS\n"
51748"_mm_ucomilt_ss(__m128 __a, __m128 __b)\n"
51749"{\n"
51750" return __builtin_ia32_ucomilt((__v4sf)__a, (__v4sf)__b);\n"
51751"}\n"
51752"\n"
51753"/// Performs an unordered comparison of two 32-bit float values using\n"
51754"/// the low-order bits of both operands to determine if the first operand is\n"
51755"/// less than or equal to the second operand and returns the result of the\n"
51756"/// comparison.\n"
51757"///\n"
51758"/// If either of the two lower 32-bit values is NaN, 0 is returned.\n"
51759"///\n"
51760"/// \\headerfile <x86intrin.h>\n"
51761"///\n"
51762"/// This intrinsic corresponds to the <c> VUCOMISS / UCOMISS </c> instructions.\n"
51763"///\n"
51764"/// \\param __a\n"
51765"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
51766"/// used in the comparison.\n"
51767"/// \\param __b\n"
51768"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
51769"/// used in the comparison.\n"
51770"/// \\returns An integer containing the comparison results. If either of the two\n"
51771"/// lower 32-bit values is NaN, 0 is returned.\n"
51772"static __inline__ int __DEFAULT_FN_ATTRS\n"
51773"_mm_ucomile_ss(__m128 __a, __m128 __b)\n"
51774"{\n"
51775" return __builtin_ia32_ucomile((__v4sf)__a, (__v4sf)__b);\n"
51776"}\n"
51777"\n"
51778"/// Performs an unordered comparison of two 32-bit float values using\n"
51779"/// the low-order bits of both operands to determine if the first operand is\n"
51780"/// greater than the second operand and returns the result of the\n"
51781"/// comparison.\n"
51782"///\n"
51783"/// If either of the two lower 32-bit values is NaN, 0 is returned.\n"
51784"///\n"
51785"/// \\headerfile <x86intrin.h>\n"
51786"///\n"
51787"/// This intrinsic corresponds to the <c> VUCOMISS / UCOMISS </c> instructions.\n"
51788"///\n"
51789"/// \\param __a\n"
51790"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
51791"/// used in the comparison.\n"
51792"/// \\param __b\n"
51793"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
51794"/// used in the comparison.\n"
51795"/// \\returns An integer containing the comparison results. If either of the two\n"
51796"/// lower 32-bit values is NaN, 0 is returned.\n"
51797"static __inline__ int __DEFAULT_FN_ATTRS\n"
51798"_mm_ucomigt_ss(__m128 __a, __m128 __b)\n"
51799"{\n"
51800" return __builtin_ia32_ucomigt((__v4sf)__a, (__v4sf)__b);\n"
51801"}\n"
51802"\n"
51803"/// Performs an unordered comparison of two 32-bit float values using\n"
51804"/// the low-order bits of both operands to determine if the first operand is\n"
51805"/// greater than or equal to the second operand and returns the result of\n"
51806"/// the comparison.\n"
51807"///\n"
51808"/// If either of the two lower 32-bit values is NaN, 0 is returned.\n"
51809"///\n"
51810"/// \\headerfile <x86intrin.h>\n"
51811"///\n"
51812"/// This intrinsic corresponds to the <c> VUCOMISS / UCOMISS </c> instructions.\n"
51813"///\n"
51814"/// \\param __a\n"
51815"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
51816"/// used in the comparison.\n"
51817"/// \\param __b\n"
51818"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
51819"/// used in the comparison.\n"
51820"/// \\returns An integer containing the comparison results. If either of the two\n"
51821"/// lower 32-bit values is NaN, 0 is returned.\n"
51822"static __inline__ int __DEFAULT_FN_ATTRS\n"
51823"_mm_ucomige_ss(__m128 __a, __m128 __b)\n"
51824"{\n"
51825" return __builtin_ia32_ucomige((__v4sf)__a, (__v4sf)__b);\n"
51826"}\n"
51827"\n"
51828"/// Performs an unordered comparison of two 32-bit float values using\n"
51829"/// the low-order bits of both operands to determine inequality and returns\n"
51830"/// the result of the comparison.\n"
51831"///\n"
51832"/// If either of the two lower 32-bit values is NaN, 1 is returned.\n"
51833"///\n"
51834"/// \\headerfile <x86intrin.h>\n"
51835"///\n"
51836"/// This intrinsic corresponds to the <c> VUCOMISS / UCOMISS </c> instructions.\n"
51837"///\n"
51838"/// \\param __a\n"
51839"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
51840"/// used in the comparison.\n"
51841"/// \\param __b\n"
51842"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
51843"/// used in the comparison.\n"
51844"/// \\returns An integer containing the comparison results. If either of the two\n"
51845"/// lower 32-bit values is NaN, 1 is returned.\n"
51846"static __inline__ int __DEFAULT_FN_ATTRS\n"
51847"_mm_ucomineq_ss(__m128 __a, __m128 __b)\n"
51848"{\n"
51849" return __builtin_ia32_ucomineq((__v4sf)__a, (__v4sf)__b);\n"
51850"}\n"
51851"\n"
51852"/// Converts a float value contained in the lower 32 bits of a vector of\n"
51853"/// [4 x float] into a 32-bit integer.\n"
51854"///\n"
51855"/// \\headerfile <x86intrin.h>\n"
51856"///\n"
51857"/// This intrinsic corresponds to the <c> VCVTSS2SI / CVTSS2SI </c>\n"
51858"/// instructions.\n"
51859"///\n"
51860"/// \\param __a\n"
51861"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
51862"/// used in the conversion.\n"
51863"/// \\returns A 32-bit integer containing the converted value.\n"
51864"static __inline__ int __DEFAULT_FN_ATTRS\n"
51865"_mm_cvtss_si32(__m128 __a)\n"
51866"{\n"
51867" return __builtin_ia32_cvtss2si((__v4sf)__a);\n"
51868"}\n"
51869"\n"
51870"/// Converts a float value contained in the lower 32 bits of a vector of\n"
51871"/// [4 x float] into a 32-bit integer.\n"
51872"///\n"
51873"/// \\headerfile <x86intrin.h>\n"
51874"///\n"
51875"/// This intrinsic corresponds to the <c> VCVTSS2SI / CVTSS2SI </c>\n"
51876"/// instructions.\n"
51877"///\n"
51878"/// \\param __a\n"
51879"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
51880"/// used in the conversion.\n"
51881"/// \\returns A 32-bit integer containing the converted value.\n"
51882"static __inline__ int __DEFAULT_FN_ATTRS\n"
51883"_mm_cvt_ss2si(__m128 __a)\n"
51884"{\n"
51885" return _mm_cvtss_si32(__a);\n"
51886"}\n"
51887"\n"
51888"#ifdef __x86_64__\n"
51889"\n"
51890"/// Converts a float value contained in the lower 32 bits of a vector of\n"
51891"/// [4 x float] into a 64-bit integer.\n"
51892"///\n"
51893"/// \\headerfile <x86intrin.h>\n"
51894"///\n"
51895"/// This intrinsic corresponds to the <c> VCVTSS2SI / CVTSS2SI </c>\n"
51896"/// instructions.\n"
51897"///\n"
51898"/// \\param __a\n"
51899"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
51900"/// used in the conversion.\n"
51901"/// \\returns A 64-bit integer containing the converted value.\n"
51902"static __inline__ long long __DEFAULT_FN_ATTRS\n"
51903"_mm_cvtss_si64(__m128 __a)\n"
51904"{\n"
51905" return __builtin_ia32_cvtss2si64((__v4sf)__a);\n"
51906"}\n"
51907"\n"
51908"#endif\n"
51909"\n"
51910"/// Converts two low-order float values in a 128-bit vector of\n"
51911"/// [4 x float] into a 64-bit vector of [2 x i32].\n"
51912"///\n"
51913"/// \\headerfile <x86intrin.h>\n"
51914"///\n"
51915"/// This intrinsic corresponds to the <c> CVTPS2PI </c> instruction.\n"
51916"///\n"
51917"/// \\param __a\n"
51918"/// A 128-bit vector of [4 x float].\n"
51919"/// \\returns A 64-bit integer vector containing the converted values.\n"
51920"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
51921"_mm_cvtps_pi32(__m128 __a)\n"
51922"{\n"
51923" return (__m64)__builtin_ia32_cvtps2pi((__v4sf)__a);\n"
51924"}\n"
51925"\n"
51926"/// Converts two low-order float values in a 128-bit vector of\n"
51927"/// [4 x float] into a 64-bit vector of [2 x i32].\n"
51928"///\n"
51929"/// \\headerfile <x86intrin.h>\n"
51930"///\n"
51931"/// This intrinsic corresponds to the <c> CVTPS2PI </c> instruction.\n"
51932"///\n"
51933"/// \\param __a\n"
51934"/// A 128-bit vector of [4 x float].\n"
51935"/// \\returns A 64-bit integer vector containing the converted values.\n"
51936"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
51937"_mm_cvt_ps2pi(__m128 __a)\n"
51938"{\n"
51939" return _mm_cvtps_pi32(__a);\n"
51940"}\n"
51941"\n"
51942"/// Converts a float value contained in the lower 32 bits of a vector of\n"
51943"/// [4 x float] into a 32-bit integer, truncating the result when it is\n"
51944"/// inexact.\n"
51945"///\n"
51946"/// \\headerfile <x86intrin.h>\n"
51947"///\n"
51948"/// This intrinsic corresponds to the <c> VCVTTSS2SI / CVTTSS2SI </c>\n"
51949"/// instructions.\n"
51950"///\n"
51951"/// \\param __a\n"
51952"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
51953"/// used in the conversion.\n"
51954"/// \\returns A 32-bit integer containing the converted value.\n"
51955"static __inline__ int __DEFAULT_FN_ATTRS\n"
51956"_mm_cvttss_si32(__m128 __a)\n"
51957"{\n"
51958" return __builtin_ia32_cvttss2si((__v4sf)__a);\n"
51959"}\n"
51960"\n"
51961"/// Converts a float value contained in the lower 32 bits of a vector of\n"
51962"/// [4 x float] into a 32-bit integer, truncating the result when it is\n"
51963"/// inexact.\n"
51964"///\n"
51965"/// \\headerfile <x86intrin.h>\n"
51966"///\n"
51967"/// This intrinsic corresponds to the <c> VCVTTSS2SI / CVTTSS2SI </c>\n"
51968"/// instructions.\n"
51969"///\n"
51970"/// \\param __a\n"
51971"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
51972"/// used in the conversion.\n"
51973"/// \\returns A 32-bit integer containing the converted value.\n"
51974"static __inline__ int __DEFAULT_FN_ATTRS\n"
51975"_mm_cvtt_ss2si(__m128 __a)\n"
51976"{\n"
51977" return _mm_cvttss_si32(__a);\n"
51978"}\n"
51979"\n"
51980"#ifdef __x86_64__\n"
51981"/// Converts a float value contained in the lower 32 bits of a vector of\n"
51982"/// [4 x float] into a 64-bit integer, truncating the result when it is\n"
51983"/// inexact.\n"
51984"///\n"
51985"/// \\headerfile <x86intrin.h>\n"
51986"///\n"
51987"/// This intrinsic corresponds to the <c> VCVTTSS2SI / CVTTSS2SI </c>\n"
51988"/// instructions.\n"
51989"///\n"
51990"/// \\param __a\n"
51991"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
51992"/// used in the conversion.\n"
51993"/// \\returns A 64-bit integer containing the converted value.\n"
51994"static __inline__ long long __DEFAULT_FN_ATTRS\n"
51995"_mm_cvttss_si64(__m128 __a)\n"
51996"{\n"
51997" return __builtin_ia32_cvttss2si64((__v4sf)__a);\n"
51998"}\n"
51999"#endif\n"
52000"\n"
52001"/// Converts two low-order float values in a 128-bit vector of\n"
52002"/// [4 x float] into a 64-bit vector of [2 x i32], truncating the result\n"
52003"/// when it is inexact.\n"
52004"///\n"
52005"/// \\headerfile <x86intrin.h>\n"
52006"///\n"
52007"/// This intrinsic corresponds to the <c> CVTTPS2PI / VTTPS2PI </c>\n"
52008"/// instructions.\n"
52009"///\n"
52010"/// \\param __a\n"
52011"/// A 128-bit vector of [4 x float].\n"
52012"/// \\returns A 64-bit integer vector containing the converted values.\n"
52013"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
52014"_mm_cvttps_pi32(__m128 __a)\n"
52015"{\n"
52016" return (__m64)__builtin_ia32_cvttps2pi((__v4sf)__a);\n"
52017"}\n"
52018"\n"
52019"/// Converts two low-order float values in a 128-bit vector of [4 x\n"
52020"/// float] into a 64-bit vector of [2 x i32], truncating the result when it\n"
52021"/// is inexact.\n"
52022"///\n"
52023"/// \\headerfile <x86intrin.h>\n"
52024"///\n"
52025"/// This intrinsic corresponds to the <c> CVTTPS2PI </c> instruction.\n"
52026"///\n"
52027"/// \\param __a\n"
52028"/// A 128-bit vector of [4 x float].\n"
52029"/// \\returns A 64-bit integer vector containing the converted values.\n"
52030"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
52031"_mm_cvtt_ps2pi(__m128 __a)\n"
52032"{\n"
52033" return _mm_cvttps_pi32(__a);\n"
52034"}\n"
52035"\n"
52036"/// Converts a 32-bit signed integer value into a floating point value\n"
52037"/// and writes it to the lower 32 bits of the destination. The remaining\n"
52038"/// higher order elements of the destination vector are copied from the\n"
52039"/// corresponding elements in the first operand.\n"
52040"///\n"
52041"/// \\headerfile <x86intrin.h>\n"
52042"///\n"
52043"/// This intrinsic corresponds to the <c> VCVTSI2SS / CVTSI2SS </c> instruction.\n"
52044"///\n"
52045"/// \\param __a\n"
52046"/// A 128-bit vector of [4 x float].\n"
52047"/// \\param __b\n"
52048"/// A 32-bit signed integer operand containing the value to be converted.\n"
52049"/// \\returns A 128-bit vector of [4 x float] whose lower 32 bits contain the\n"
52050"/// converted value of the second operand. The upper 96 bits are copied from\n"
52051"/// the upper 96 bits of the first operand.\n"
52052"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52053"_mm_cvtsi32_ss(__m128 __a, int __b)\n"
52054"{\n"
52055" __a[0] = __b;\n"
52056" return __a;\n"
52057"}\n"
52058"\n"
52059"/// Converts a 32-bit signed integer value into a floating point value\n"
52060"/// and writes it to the lower 32 bits of the destination. The remaining\n"
52061"/// higher order elements of the destination are copied from the\n"
52062"/// corresponding elements in the first operand.\n"
52063"///\n"
52064"/// \\headerfile <x86intrin.h>\n"
52065"///\n"
52066"/// This intrinsic corresponds to the <c> VCVTSI2SS / CVTSI2SS </c> instruction.\n"
52067"///\n"
52068"/// \\param __a\n"
52069"/// A 128-bit vector of [4 x float].\n"
52070"/// \\param __b\n"
52071"/// A 32-bit signed integer operand containing the value to be converted.\n"
52072"/// \\returns A 128-bit vector of [4 x float] whose lower 32 bits contain the\n"
52073"/// converted value of the second operand. The upper 96 bits are copied from\n"
52074"/// the upper 96 bits of the first operand.\n"
52075"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52076"_mm_cvt_si2ss(__m128 __a, int __b)\n"
52077"{\n"
52078" return _mm_cvtsi32_ss(__a, __b);\n"
52079"}\n"
52080"\n"
52081"#ifdef __x86_64__\n"
52082"\n"
52083"/// Converts a 64-bit signed integer value into a floating point value\n"
52084"/// and writes it to the lower 32 bits of the destination. The remaining\n"
52085"/// higher order elements of the destination are copied from the\n"
52086"/// corresponding elements in the first operand.\n"
52087"///\n"
52088"/// \\headerfile <x86intrin.h>\n"
52089"///\n"
52090"/// This intrinsic corresponds to the <c> VCVTSI2SS / CVTSI2SS </c> instruction.\n"
52091"///\n"
52092"/// \\param __a\n"
52093"/// A 128-bit vector of [4 x float].\n"
52094"/// \\param __b\n"
52095"/// A 64-bit signed integer operand containing the value to be converted.\n"
52096"/// \\returns A 128-bit vector of [4 x float] whose lower 32 bits contain the\n"
52097"/// converted value of the second operand. The upper 96 bits are copied from\n"
52098"/// the upper 96 bits of the first operand.\n"
52099"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52100"_mm_cvtsi64_ss(__m128 __a, long long __b)\n"
52101"{\n"
52102" __a[0] = __b;\n"
52103" return __a;\n"
52104"}\n"
52105"\n"
52106"#endif\n"
52107"\n"
52108"/// Converts two elements of a 64-bit vector of [2 x i32] into two\n"
52109"/// floating point values and writes them to the lower 64-bits of the\n"
52110"/// destination. The remaining higher order elements of the destination are\n"
52111"/// copied from the corresponding elements in the first operand.\n"
52112"///\n"
52113"/// \\headerfile <x86intrin.h>\n"
52114"///\n"
52115"/// This intrinsic corresponds to the <c> CVTPI2PS </c> instruction.\n"
52116"///\n"
52117"/// \\param __a\n"
52118"/// A 128-bit vector of [4 x float].\n"
52119"/// \\param __b\n"
52120"/// A 64-bit vector of [2 x i32]. The elements in this vector are converted\n"
52121"/// and written to the corresponding low-order elements in the destination.\n"
52122"/// \\returns A 128-bit vector of [4 x float] whose lower 64 bits contain the\n"
52123"/// converted value of the second operand. The upper 64 bits are copied from\n"
52124"/// the upper 64 bits of the first operand.\n"
52125"static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX\n"
52126"_mm_cvtpi32_ps(__m128 __a, __m64 __b)\n"
52127"{\n"
52128" return __builtin_ia32_cvtpi2ps((__v4sf)__a, (__v2si)__b);\n"
52129"}\n"
52130"\n"
52131"/// Converts two elements of a 64-bit vector of [2 x i32] into two\n"
52132"/// floating point values and writes them to the lower 64-bits of the\n"
52133"/// destination. The remaining higher order elements of the destination are\n"
52134"/// copied from the corresponding elements in the first operand.\n"
52135"///\n"
52136"/// \\headerfile <x86intrin.h>\n"
52137"///\n"
52138"/// This intrinsic corresponds to the <c> CVTPI2PS </c> instruction.\n"
52139"///\n"
52140"/// \\param __a\n"
52141"/// A 128-bit vector of [4 x float].\n"
52142"/// \\param __b\n"
52143"/// A 64-bit vector of [2 x i32]. The elements in this vector are converted\n"
52144"/// and written to the corresponding low-order elements in the destination.\n"
52145"/// \\returns A 128-bit vector of [4 x float] whose lower 64 bits contain the\n"
52146"/// converted value from the second operand. The upper 64 bits are copied\n"
52147"/// from the upper 64 bits of the first operand.\n"
52148"static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX\n"
52149"_mm_cvt_pi2ps(__m128 __a, __m64 __b)\n"
52150"{\n"
52151" return _mm_cvtpi32_ps(__a, __b);\n"
52152"}\n"
52153"\n"
52154"/// Extracts a float value contained in the lower 32 bits of a vector of\n"
52155"/// [4 x float].\n"
52156"///\n"
52157"/// \\headerfile <x86intrin.h>\n"
52158"///\n"
52159"/// This intrinsic has no corresponding instruction.\n"
52160"///\n"
52161"/// \\param __a\n"
52162"/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n"
52163"/// used in the extraction.\n"
52164"/// \\returns A 32-bit float containing the extracted value.\n"
52165"static __inline__ float __DEFAULT_FN_ATTRS\n"
52166"_mm_cvtss_f32(__m128 __a)\n"
52167"{\n"
52168" return __a[0];\n"
52169"}\n"
52170"\n"
52171"/// Loads two packed float values from the address \\a __p into the\n"
52172"/// high-order bits of a 128-bit vector of [4 x float]. The low-order bits\n"
52173"/// are copied from the low-order bits of the first operand.\n"
52174"///\n"
52175"/// \\headerfile <x86intrin.h>\n"
52176"///\n"
52177"/// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction.\n"
52178"///\n"
52179"/// \\param __a\n"
52180"/// A 128-bit vector of [4 x float]. Bits [63:0] are written to bits [63:0]\n"
52181"/// of the destination.\n"
52182"/// \\param __p\n"
52183"/// A pointer to two packed float values. Bits [63:0] are written to bits\n"
52184"/// [127:64] of the destination.\n"
52185"/// \\returns A 128-bit vector of [4 x float] containing the moved values.\n"
52186"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52187"_mm_loadh_pi(__m128 __a, const __m64 *__p)\n"
52188"{\n"
52189" typedef float __mm_loadh_pi_v2f32 __attribute__((__vector_size__(8)));\n"
52190" struct __mm_loadh_pi_struct {\n"
52191" __mm_loadh_pi_v2f32 __u;\n"
52192" } __attribute__((__packed__, __may_alias__));\n"
52193" __mm_loadh_pi_v2f32 __b = ((struct __mm_loadh_pi_struct*)__p)->__u;\n"
52194" __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1);\n"
52195" return __builtin_shufflevector(__a, __bb, 0, 1, 4, 5);\n"
52196"}\n"
52197"\n"
52198"/// Loads two packed float values from the address \\a __p into the\n"
52199"/// low-order bits of a 128-bit vector of [4 x float]. The high-order bits\n"
52200"/// are copied from the high-order bits of the first operand.\n"
52201"///\n"
52202"/// \\headerfile <x86intrin.h>\n"
52203"///\n"
52204"/// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction.\n"
52205"///\n"
52206"/// \\param __a\n"
52207"/// A 128-bit vector of [4 x float]. Bits [127:64] are written to bits\n"
52208"/// [127:64] of the destination.\n"
52209"/// \\param __p\n"
52210"/// A pointer to two packed float values. Bits [63:0] are written to bits\n"
52211"/// [63:0] of the destination.\n"
52212"/// \\returns A 128-bit vector of [4 x float] containing the moved values.\n"
52213"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52214"_mm_loadl_pi(__m128 __a, const __m64 *__p)\n"
52215"{\n"
52216" typedef float __mm_loadl_pi_v2f32 __attribute__((__vector_size__(8)));\n"
52217" struct __mm_loadl_pi_struct {\n"
52218" __mm_loadl_pi_v2f32 __u;\n"
52219" } __attribute__((__packed__, __may_alias__));\n"
52220" __mm_loadl_pi_v2f32 __b = ((struct __mm_loadl_pi_struct*)__p)->__u;\n"
52221" __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1);\n"
52222" return __builtin_shufflevector(__a, __bb, 4, 5, 2, 3);\n"
52223"}\n"
52224"\n"
52225"/// Constructs a 128-bit floating-point vector of [4 x float]. The lower\n"
52226"/// 32 bits of the vector are initialized with the single-precision\n"
52227"/// floating-point value loaded from a specified memory location. The upper\n"
52228"/// 96 bits are set to zero.\n"
52229"///\n"
52230"/// \\headerfile <x86intrin.h>\n"
52231"///\n"
52232"/// This intrinsic corresponds to the <c> VMOVSS / MOVSS </c> instruction.\n"
52233"///\n"
52234"/// \\param __p\n"
52235"/// A pointer to a 32-bit memory location containing a single-precision\n"
52236"/// floating-point value.\n"
52237"/// \\returns An initialized 128-bit floating-point vector of [4 x float]. The\n"
52238"/// lower 32 bits contain the value loaded from the memory location. The\n"
52239"/// upper 96 bits are set to zero.\n"
52240"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52241"_mm_load_ss(const float *__p)\n"
52242"{\n"
52243" struct __mm_load_ss_struct {\n"
52244" float __u;\n"
52245" } __attribute__((__packed__, __may_alias__));\n"
52246" float __u = ((struct __mm_load_ss_struct*)__p)->__u;\n"
52247" return __extension__ (__m128){ __u, 0, 0, 0 };\n"
52248"}\n"
52249"\n"
52250"/// Loads a 32-bit float value and duplicates it to all four vector\n"
52251"/// elements of a 128-bit vector of [4 x float].\n"
52252"///\n"
52253"/// \\headerfile <x86intrin.h>\n"
52254"///\n"
52255"/// This intrinsic corresponds to the <c> VBROADCASTSS / MOVSS + shuffling </c>\n"
52256"/// instruction.\n"
52257"///\n"
52258"/// \\param __p\n"
52259"/// A pointer to a float value to be loaded and duplicated.\n"
52260"/// \\returns A 128-bit vector of [4 x float] containing the loaded and\n"
52261"/// duplicated values.\n"
52262"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52263"_mm_load1_ps(const float *__p)\n"
52264"{\n"
52265" struct __mm_load1_ps_struct {\n"
52266" float __u;\n"
52267" } __attribute__((__packed__, __may_alias__));\n"
52268" float __u = ((struct __mm_load1_ps_struct*)__p)->__u;\n"
52269" return __extension__ (__m128){ __u, __u, __u, __u };\n"
52270"}\n"
52271"\n"
52272"#define _mm_load_ps1(p) _mm_load1_ps(p)\n"
52273"\n"
52274"/// Loads a 128-bit floating-point vector of [4 x float] from an aligned\n"
52275"/// memory location.\n"
52276"///\n"
52277"/// \\headerfile <x86intrin.h>\n"
52278"///\n"
52279"/// This intrinsic corresponds to the <c> VMOVAPS / MOVAPS </c> instruction.\n"
52280"///\n"
52281"/// \\param __p\n"
52282"/// A pointer to a 128-bit memory location. The address of the memory\n"
52283"/// location has to be 128-bit aligned.\n"
52284"/// \\returns A 128-bit vector of [4 x float] containing the loaded values.\n"
52285"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52286"_mm_load_ps(const float *__p)\n"
52287"{\n"
52288" return *(__m128*)__p;\n"
52289"}\n"
52290"\n"
52291"/// Loads a 128-bit floating-point vector of [4 x float] from an\n"
52292"/// unaligned memory location.\n"
52293"///\n"
52294"/// \\headerfile <x86intrin.h>\n"
52295"///\n"
52296"/// This intrinsic corresponds to the <c> VMOVUPS / MOVUPS </c> instruction.\n"
52297"///\n"
52298"/// \\param __p\n"
52299"/// A pointer to a 128-bit memory location. The address of the memory\n"
52300"/// location does not have to be aligned.\n"
52301"/// \\returns A 128-bit vector of [4 x float] containing the loaded values.\n"
52302"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52303"_mm_loadu_ps(const float *__p)\n"
52304"{\n"
52305" struct __loadu_ps {\n"
52306" __m128 __v;\n"
52307" } __attribute__((__packed__, __may_alias__));\n"
52308" return ((struct __loadu_ps*)__p)->__v;\n"
52309"}\n"
52310"\n"
52311"/// Loads four packed float values, in reverse order, from an aligned\n"
52312"/// memory location to 32-bit elements in a 128-bit vector of [4 x float].\n"
52313"///\n"
52314"/// \\headerfile <x86intrin.h>\n"
52315"///\n"
52316"/// This intrinsic corresponds to the <c> VMOVAPS / MOVAPS + shuffling </c>\n"
52317"/// instruction.\n"
52318"///\n"
52319"/// \\param __p\n"
52320"/// A pointer to a 128-bit memory location. The address of the memory\n"
52321"/// location has to be 128-bit aligned.\n"
52322"/// \\returns A 128-bit vector of [4 x float] containing the moved values, loaded\n"
52323"/// in reverse order.\n"
52324"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52325"_mm_loadr_ps(const float *__p)\n"
52326"{\n"
52327" __m128 __a = _mm_load_ps(__p);\n"
52328" return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 3, 2, 1, 0);\n"
52329"}\n"
52330"\n"
52331"/// Create a 128-bit vector of [4 x float] with undefined values.\n"
52332"///\n"
52333"/// \\headerfile <x86intrin.h>\n"
52334"///\n"
52335"/// This intrinsic has no corresponding instruction.\n"
52336"///\n"
52337"/// \\returns A 128-bit vector of [4 x float] containing undefined values.\n"
52338"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52339"_mm_undefined_ps(void)\n"
52340"{\n"
52341" return (__m128)__builtin_ia32_undef128();\n"
52342"}\n"
52343"\n"
52344"/// Constructs a 128-bit floating-point vector of [4 x float]. The lower\n"
52345"/// 32 bits of the vector are initialized with the specified single-precision\n"
52346"/// floating-point value. The upper 96 bits are set to zero.\n"
52347"///\n"
52348"/// \\headerfile <x86intrin.h>\n"
52349"///\n"
52350"/// This intrinsic corresponds to the <c> VMOVSS / MOVSS </c> instruction.\n"
52351"///\n"
52352"/// \\param __w\n"
52353"/// A single-precision floating-point value used to initialize the lower 32\n"
52354"/// bits of the result.\n"
52355"/// \\returns An initialized 128-bit floating-point vector of [4 x float]. The\n"
52356"/// lower 32 bits contain the value provided in the source operand. The\n"
52357"/// upper 96 bits are set to zero.\n"
52358"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52359"_mm_set_ss(float __w)\n"
52360"{\n"
52361" return __extension__ (__m128){ __w, 0, 0, 0 };\n"
52362"}\n"
52363"\n"
52364"/// Constructs a 128-bit floating-point vector of [4 x float], with each\n"
52365"/// of the four single-precision floating-point vector elements set to the\n"
52366"/// specified single-precision floating-point value.\n"
52367"///\n"
52368"/// \\headerfile <x86intrin.h>\n"
52369"///\n"
52370"/// This intrinsic corresponds to the <c> VPERMILPS / PERMILPS </c> instruction.\n"
52371"///\n"
52372"/// \\param __w\n"
52373"/// A single-precision floating-point value used to initialize each vector\n"
52374"/// element of the result.\n"
52375"/// \\returns An initialized 128-bit floating-point vector of [4 x float].\n"
52376"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52377"_mm_set1_ps(float __w)\n"
52378"{\n"
52379" return __extension__ (__m128){ __w, __w, __w, __w };\n"
52380"}\n"
52381"\n"
52382"/* Microsoft specific. */\n"
52383"/// Constructs a 128-bit floating-point vector of [4 x float], with each\n"
52384"/// of the four single-precision floating-point vector elements set to the\n"
52385"/// specified single-precision floating-point value.\n"
52386"///\n"
52387"/// \\headerfile <x86intrin.h>\n"
52388"///\n"
52389"/// This intrinsic corresponds to the <c> VPERMILPS / PERMILPS </c> instruction.\n"
52390"///\n"
52391"/// \\param __w\n"
52392"/// A single-precision floating-point value used to initialize each vector\n"
52393"/// element of the result.\n"
52394"/// \\returns An initialized 128-bit floating-point vector of [4 x float].\n"
52395"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52396"_mm_set_ps1(float __w)\n"
52397"{\n"
52398" return _mm_set1_ps(__w);\n"
52399"}\n"
52400"\n"
52401"/// Constructs a 128-bit floating-point vector of [4 x float]\n"
52402"/// initialized with the specified single-precision floating-point values.\n"
52403"///\n"
52404"/// \\headerfile <x86intrin.h>\n"
52405"///\n"
52406"/// This intrinsic is a utility function and does not correspond to a specific\n"
52407"/// instruction.\n"
52408"///\n"
52409"/// \\param __z\n"
52410"/// A single-precision floating-point value used to initialize bits [127:96]\n"
52411"/// of the result.\n"
52412"/// \\param __y\n"
52413"/// A single-precision floating-point value used to initialize bits [95:64]\n"
52414"/// of the result.\n"
52415"/// \\param __x\n"
52416"/// A single-precision floating-point value used to initialize bits [63:32]\n"
52417"/// of the result.\n"
52418"/// \\param __w\n"
52419"/// A single-precision floating-point value used to initialize bits [31:0]\n"
52420"/// of the result.\n"
52421"/// \\returns An initialized 128-bit floating-point vector of [4 x float].\n"
52422"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52423"_mm_set_ps(float __z, float __y, float __x, float __w)\n"
52424"{\n"
52425" return __extension__ (__m128){ __w, __x, __y, __z };\n"
52426"}\n"
52427"\n"
52428"/// Constructs a 128-bit floating-point vector of [4 x float],\n"
52429"/// initialized in reverse order with the specified 32-bit single-precision\n"
52430"/// float-point values.\n"
52431"///\n"
52432"/// \\headerfile <x86intrin.h>\n"
52433"///\n"
52434"/// This intrinsic is a utility function and does not correspond to a specific\n"
52435"/// instruction.\n"
52436"///\n"
52437"/// \\param __z\n"
52438"/// A single-precision floating-point value used to initialize bits [31:0]\n"
52439"/// of the result.\n"
52440"/// \\param __y\n"
52441"/// A single-precision floating-point value used to initialize bits [63:32]\n"
52442"/// of the result.\n"
52443"/// \\param __x\n"
52444"/// A single-precision floating-point value used to initialize bits [95:64]\n"
52445"/// of the result.\n"
52446"/// \\param __w\n"
52447"/// A single-precision floating-point value used to initialize bits [127:96]\n"
52448"/// of the result.\n"
52449"/// \\returns An initialized 128-bit floating-point vector of [4 x float].\n"
52450"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52451"_mm_setr_ps(float __z, float __y, float __x, float __w)\n"
52452"{\n"
52453" return __extension__ (__m128){ __z, __y, __x, __w };\n"
52454"}\n"
52455"\n"
52456"/// Constructs a 128-bit floating-point vector of [4 x float] initialized\n"
52457"/// to zero.\n"
52458"///\n"
52459"/// \\headerfile <x86intrin.h>\n"
52460"///\n"
52461"/// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction.\n"
52462"///\n"
52463"/// \\returns An initialized 128-bit floating-point vector of [4 x float] with\n"
52464"/// all elements set to zero.\n"
52465"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
52466"_mm_setzero_ps(void)\n"
52467"{\n"
52468" return __extension__ (__m128){ 0, 0, 0, 0 };\n"
52469"}\n"
52470"\n"
52471"/// Stores the upper 64 bits of a 128-bit vector of [4 x float] to a\n"
52472"/// memory location.\n"
52473"///\n"
52474"/// \\headerfile <x86intrin.h>\n"
52475"///\n"
52476"/// This intrinsic corresponds to the <c> VPEXTRQ / PEXTRQ </c> instruction.\n"
52477"///\n"
52478"/// \\param __p\n"
52479"/// A pointer to a 64-bit memory location.\n"
52480"/// \\param __a\n"
52481"/// A 128-bit vector of [4 x float] containing the values to be stored.\n"
52482"static __inline__ void __DEFAULT_FN_ATTRS\n"
52483"_mm_storeh_pi(__m64 *__p, __m128 __a)\n"
52484"{\n"
52485" __builtin_ia32_storehps((__v2si *)__p, (__v4sf)__a);\n"
52486"}\n"
52487"\n"
52488"/// Stores the lower 64 bits of a 128-bit vector of [4 x float] to a\n"
52489"/// memory location.\n"
52490"///\n"
52491"/// \\headerfile <x86intrin.h>\n"
52492"///\n"
52493"/// This intrinsic corresponds to the <c> VMOVLPS / MOVLPS </c> instruction.\n"
52494"///\n"
52495"/// \\param __p\n"
52496"/// A pointer to a memory location that will receive the float values.\n"
52497"/// \\param __a\n"
52498"/// A 128-bit vector of [4 x float] containing the values to be stored.\n"
52499"static __inline__ void __DEFAULT_FN_ATTRS\n"
52500"_mm_storel_pi(__m64 *__p, __m128 __a)\n"
52501"{\n"
52502" __builtin_ia32_storelps((__v2si *)__p, (__v4sf)__a);\n"
52503"}\n"
52504"\n"
52505"/// Stores the lower 32 bits of a 128-bit vector of [4 x float] to a\n"
52506"/// memory location.\n"
52507"///\n"
52508"/// \\headerfile <x86intrin.h>\n"
52509"///\n"
52510"/// This intrinsic corresponds to the <c> VMOVSS / MOVSS </c> instruction.\n"
52511"///\n"
52512"/// \\param __p\n"
52513"/// A pointer to a 32-bit memory location.\n"
52514"/// \\param __a\n"
52515"/// A 128-bit vector of [4 x float] containing the value to be stored.\n"
52516"static __inline__ void __DEFAULT_FN_ATTRS\n"
52517"_mm_store_ss(float *__p, __m128 __a)\n"
52518"{\n"
52519" struct __mm_store_ss_struct {\n"
52520" float __u;\n"
52521" } __attribute__((__packed__, __may_alias__));\n"
52522" ((struct __mm_store_ss_struct*)__p)->__u = __a[0];\n"
52523"}\n"
52524"\n"
52525"/// Stores a 128-bit vector of [4 x float] to an unaligned memory\n"
52526"/// location.\n"
52527"///\n"
52528"/// \\headerfile <x86intrin.h>\n"
52529"///\n"
52530"/// This intrinsic corresponds to the <c> VMOVUPS / MOVUPS </c> instruction.\n"
52531"///\n"
52532"/// \\param __p\n"
52533"/// A pointer to a 128-bit memory location. The address of the memory\n"
52534"/// location does not have to be aligned.\n"
52535"/// \\param __a\n"
52536"/// A 128-bit vector of [4 x float] containing the values to be stored.\n"
52537"static __inline__ void __DEFAULT_FN_ATTRS\n"
52538"_mm_storeu_ps(float *__p, __m128 __a)\n"
52539"{\n"
52540" struct __storeu_ps {\n"
52541" __m128 __v;\n"
52542" } __attribute__((__packed__, __may_alias__));\n"
52543" ((struct __storeu_ps*)__p)->__v = __a;\n"
52544"}\n"
52545"\n"
52546"/// Stores a 128-bit vector of [4 x float] into an aligned memory\n"
52547"/// location.\n"
52548"///\n"
52549"/// \\headerfile <x86intrin.h>\n"
52550"///\n"
52551"/// This intrinsic corresponds to the <c> VMOVAPS / MOVAPS </c> instruction.\n"
52552"///\n"
52553"/// \\param __p\n"
52554"/// A pointer to a 128-bit memory location. The address of the memory\n"
52555"/// location has to be 16-byte aligned.\n"
52556"/// \\param __a\n"
52557"/// A 128-bit vector of [4 x float] containing the values to be stored.\n"
52558"static __inline__ void __DEFAULT_FN_ATTRS\n"
52559"_mm_store_ps(float *__p, __m128 __a)\n"
52560"{\n"
52561" *(__m128*)__p = __a;\n"
52562"}\n"
52563"\n"
52564"/// Stores the lower 32 bits of a 128-bit vector of [4 x float] into\n"
52565"/// four contiguous elements in an aligned memory location.\n"
52566"///\n"
52567"/// \\headerfile <x86intrin.h>\n"
52568"///\n"
52569"/// This intrinsic corresponds to <c> VMOVAPS / MOVAPS + shuffling </c>\n"
52570"/// instruction.\n"
52571"///\n"
52572"/// \\param __p\n"
52573"/// A pointer to a 128-bit memory location.\n"
52574"/// \\param __a\n"
52575"/// A 128-bit vector of [4 x float] whose lower 32 bits are stored to each\n"
52576"/// of the four contiguous elements pointed by \\a __p.\n"
52577"static __inline__ void __DEFAULT_FN_ATTRS\n"
52578"_mm_store1_ps(float *__p, __m128 __a)\n"
52579"{\n"
52580" __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 0, 0);\n"
52581" _mm_store_ps(__p, __a);\n"
52582"}\n"
52583"\n"
52584"/// Stores the lower 32 bits of a 128-bit vector of [4 x float] into\n"
52585"/// four contiguous elements in an aligned memory location.\n"
52586"///\n"
52587"/// \\headerfile <x86intrin.h>\n"
52588"///\n"
52589"/// This intrinsic corresponds to <c> VMOVAPS / MOVAPS + shuffling </c>\n"
52590"/// instruction.\n"
52591"///\n"
52592"/// \\param __p\n"
52593"/// A pointer to a 128-bit memory location.\n"
52594"/// \\param __a\n"
52595"/// A 128-bit vector of [4 x float] whose lower 32 bits are stored to each\n"
52596"/// of the four contiguous elements pointed by \\a __p.\n"
52597"static __inline__ void __DEFAULT_FN_ATTRS\n"
52598"_mm_store_ps1(float *__p, __m128 __a)\n"
52599"{\n"
52600" _mm_store1_ps(__p, __a);\n"
52601"}\n"
52602"\n"
52603"/// Stores float values from a 128-bit vector of [4 x float] to an\n"
52604"/// aligned memory location in reverse order.\n"
52605"///\n"
52606"/// \\headerfile <x86intrin.h>\n"
52607"///\n"
52608"/// This intrinsic corresponds to the <c> VMOVAPS / MOVAPS + shuffling </c>\n"
52609"/// instruction.\n"
52610"///\n"
52611"/// \\param __p\n"
52612"/// A pointer to a 128-bit memory location. The address of the memory\n"
52613"/// location has to be 128-bit aligned.\n"
52614"/// \\param __a\n"
52615"/// A 128-bit vector of [4 x float] containing the values to be stored.\n"
52616"static __inline__ void __DEFAULT_FN_ATTRS\n"
52617"_mm_storer_ps(float *__p, __m128 __a)\n"
52618"{\n"
52619" __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 3, 2, 1, 0);\n"
52620" _mm_store_ps(__p, __a);\n"
52621"}\n"
52622"\n"
52623"#define _MM_HINT_ET0 7\n"
52624"#define _MM_HINT_ET1 6\n"
52625"#define _MM_HINT_T0 3\n"
52626"#define _MM_HINT_T1 2\n"
52627"#define _MM_HINT_T2 1\n"
52628"#define _MM_HINT_NTA 0\n"
52629"\n"
52630"#ifndef _MSC_VER\n"
52631"/* FIXME: We have to #define this because \"sel\" must be a constant integer, and\n"
52632" Sema doesn't do any form of constant propagation yet. */\n"
52633"\n"
52634"/// Loads one cache line of data from the specified address to a location\n"
52635"/// closer to the processor.\n"
52636"///\n"
52637"/// \\headerfile <x86intrin.h>\n"
52638"///\n"
52639"/// \\code\n"
52640"/// void _mm_prefetch(const void * a, const int sel);\n"
52641"/// \\endcode\n"
52642"///\n"
52643"/// This intrinsic corresponds to the <c> PREFETCHNTA </c> instruction.\n"
52644"///\n"
52645"/// \\param a\n"
52646"/// A pointer to a memory location containing a cache line of data.\n"
52647"/// \\param sel\n"
52648"/// A predefined integer constant specifying the type of prefetch\n"
52649"/// operation: \\n\n"
52650"/// _MM_HINT_NTA: Move data using the non-temporal access (NTA) hint. The\n"
52651"/// PREFETCHNTA instruction will be generated. \\n\n"
52652"/// _MM_HINT_T0: Move data using the T0 hint. The PREFETCHT0 instruction will\n"
52653"/// be generated. \\n\n"
52654"/// _MM_HINT_T1: Move data using the T1 hint. The PREFETCHT1 instruction will\n"
52655"/// be generated. \\n\n"
52656"/// _MM_HINT_T2: Move data using the T2 hint. The PREFETCHT2 instruction will\n"
52657"/// be generated.\n"
52658"#define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), \\\n"
52659" ((sel) >> 2) & 1, (sel) & 0x3))\n"
52660"#endif\n"
52661"\n"
52662"/// Stores a 64-bit integer in the specified aligned memory location. To\n"
52663"/// minimize caching, the data is flagged as non-temporal (unlikely to be\n"
52664"/// used again soon).\n"
52665"///\n"
52666"/// \\headerfile <x86intrin.h>\n"
52667"///\n"
52668"/// This intrinsic corresponds to the <c> MOVNTQ </c> instruction.\n"
52669"///\n"
52670"/// \\param __p\n"
52671"/// A pointer to an aligned memory location used to store the register value.\n"
52672"/// \\param __a\n"
52673"/// A 64-bit integer containing the value to be stored.\n"
52674"static __inline__ void __DEFAULT_FN_ATTRS_MMX\n"
52675"_mm_stream_pi(__m64 *__p, __m64 __a)\n"
52676"{\n"
52677" __builtin_ia32_movntq(__p, __a);\n"
52678"}\n"
52679"\n"
52680"/// Moves packed float values from a 128-bit vector of [4 x float] to a\n"
52681"/// 128-bit aligned memory location. To minimize caching, the data is flagged\n"
52682"/// as non-temporal (unlikely to be used again soon).\n"
52683"///\n"
52684"/// \\headerfile <x86intrin.h>\n"
52685"///\n"
52686"/// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction.\n"
52687"///\n"
52688"/// \\param __p\n"
52689"/// A pointer to a 128-bit aligned memory location that will receive the\n"
52690"/// single-precision floating-point values.\n"
52691"/// \\param __a\n"
52692"/// A 128-bit vector of [4 x float] containing the values to be moved.\n"
52693"static __inline__ void __DEFAULT_FN_ATTRS\n"
52694"_mm_stream_ps(float *__p, __m128 __a)\n"
52695"{\n"
52696" __builtin_nontemporal_store((__v4sf)__a, (__v4sf*)__p);\n"
52697"}\n"
52698"\n"
52699"#if defined(__cplusplus)\n"
52700"extern \"C\" {\n"
52701"#endif\n"
52702"\n"
52703"/// Forces strong memory ordering (serialization) between store\n"
52704"/// instructions preceding this instruction and store instructions following\n"
52705"/// this instruction, ensuring the system completes all previous stores\n"
52706"/// before executing subsequent stores.\n"
52707"///\n"
52708"/// \\headerfile <x86intrin.h>\n"
52709"///\n"
52710"/// This intrinsic corresponds to the <c> SFENCE </c> instruction.\n"
52711"///\n"
52712"void _mm_sfence(void);\n"
52713"\n"
52714"#if defined(__cplusplus)\n"
52715"} // extern \"C\"\n"
52716"#endif\n"
52717"\n"
52718"/// Extracts 16-bit element from a 64-bit vector of [4 x i16] and\n"
52719"/// returns it, as specified by the immediate integer operand.\n"
52720"///\n"
52721"/// \\headerfile <x86intrin.h>\n"
52722"///\n"
52723"/// \\code\n"
52724"/// int _mm_extract_pi16(__m64 a, int n);\n"
52725"/// \\endcode\n"
52726"///\n"
52727"/// This intrinsic corresponds to the <c> VPEXTRW / PEXTRW </c> instruction.\n"
52728"///\n"
52729"/// \\param a\n"
52730"/// A 64-bit vector of [4 x i16].\n"
52731"/// \\param n\n"
52732"/// An immediate integer operand that determines which bits are extracted: \\n\n"
52733"/// 0: Bits [15:0] are copied to the destination. \\n\n"
52734"/// 1: Bits [31:16] are copied to the destination. \\n\n"
52735"/// 2: Bits [47:32] are copied to the destination. \\n\n"
52736"/// 3: Bits [63:48] are copied to the destination.\n"
52737"/// \\returns A 16-bit integer containing the extracted 16 bits of packed data.\n"
52738"#define _mm_extract_pi16(a, n) \\\n"
52739" (int)__builtin_ia32_vec_ext_v4hi((__m64)a, (int)n)\n"
52740"\n"
52741"/// Copies data from the 64-bit vector of [4 x i16] to the destination,\n"
52742"/// and inserts the lower 16-bits of an integer operand at the 16-bit offset\n"
52743"/// specified by the immediate operand \\a n.\n"
52744"///\n"
52745"/// \\headerfile <x86intrin.h>\n"
52746"///\n"
52747"/// \\code\n"
52748"/// __m64 _mm_insert_pi16(__m64 a, int d, int n);\n"
52749"/// \\endcode\n"
52750"///\n"
52751"/// This intrinsic corresponds to the <c> PINSRW </c> instruction.\n"
52752"///\n"
52753"/// \\param a\n"
52754"/// A 64-bit vector of [4 x i16].\n"
52755"/// \\param d\n"
52756"/// An integer. The lower 16-bit value from this operand is written to the\n"
52757"/// destination at the offset specified by operand \\a n.\n"
52758"/// \\param n\n"
52759"/// An immediate integer operant that determines which the bits to be used\n"
52760"/// in the destination. \\n\n"
52761"/// 0: Bits [15:0] are copied to the destination. \\n\n"
52762"/// 1: Bits [31:16] are copied to the destination. \\n\n"
52763"/// 2: Bits [47:32] are copied to the destination. \\n\n"
52764"/// 3: Bits [63:48] are copied to the destination. \\n\n"
52765"/// The remaining bits in the destination are copied from the corresponding\n"
52766"/// bits in operand \\a a.\n"
52767"/// \\returns A 64-bit integer vector containing the copied packed data from the\n"
52768"/// operands.\n"
52769"#define _mm_insert_pi16(a, d, n) \\\n"
52770" (__m64)__builtin_ia32_vec_set_v4hi((__m64)a, (int)d, (int)n)\n"
52771"\n"
52772"/// Compares each of the corresponding packed 16-bit integer values of\n"
52773"/// the 64-bit integer vectors, and writes the greater value to the\n"
52774"/// corresponding bits in the destination.\n"
52775"///\n"
52776"/// \\headerfile <x86intrin.h>\n"
52777"///\n"
52778"/// This intrinsic corresponds to the <c> PMAXSW </c> instruction.\n"
52779"///\n"
52780"/// \\param __a\n"
52781"/// A 64-bit integer vector containing one of the source operands.\n"
52782"/// \\param __b\n"
52783"/// A 64-bit integer vector containing one of the source operands.\n"
52784"/// \\returns A 64-bit integer vector containing the comparison results.\n"
52785"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
52786"_mm_max_pi16(__m64 __a, __m64 __b)\n"
52787"{\n"
52788" return (__m64)__builtin_ia32_pmaxsw((__v4hi)__a, (__v4hi)__b);\n"
52789"}\n"
52790"\n"
52791"/// Compares each of the corresponding packed 8-bit unsigned integer\n"
52792"/// values of the 64-bit integer vectors, and writes the greater value to the\n"
52793"/// corresponding bits in the destination.\n"
52794"///\n"
52795"/// \\headerfile <x86intrin.h>\n"
52796"///\n"
52797"/// This intrinsic corresponds to the <c> PMAXUB </c> instruction.\n"
52798"///\n"
52799"/// \\param __a\n"
52800"/// A 64-bit integer vector containing one of the source operands.\n"
52801"/// \\param __b\n"
52802"/// A 64-bit integer vector containing one of the source operands.\n"
52803"/// \\returns A 64-bit integer vector containing the comparison results.\n"
52804"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
52805"_mm_max_pu8(__m64 __a, __m64 __b)\n"
52806"{\n"
52807" return (__m64)__builtin_ia32_pmaxub((__v8qi)__a, (__v8qi)__b);\n"
52808"}\n"
52809"\n"
52810"/// Compares each of the corresponding packed 16-bit integer values of\n"
52811"/// the 64-bit integer vectors, and writes the lesser value to the\n"
52812"/// corresponding bits in the destination.\n"
52813"///\n"
52814"/// \\headerfile <x86intrin.h>\n"
52815"///\n"
52816"/// This intrinsic corresponds to the <c> PMINSW </c> instruction.\n"
52817"///\n"
52818"/// \\param __a\n"
52819"/// A 64-bit integer vector containing one of the source operands.\n"
52820"/// \\param __b\n"
52821"/// A 64-bit integer vector containing one of the source operands.\n"
52822"/// \\returns A 64-bit integer vector containing the comparison results.\n"
52823"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
52824"_mm_min_pi16(__m64 __a, __m64 __b)\n"
52825"{\n"
52826" return (__m64)__builtin_ia32_pminsw((__v4hi)__a, (__v4hi)__b);\n"
52827"}\n"
52828"\n"
52829"/// Compares each of the corresponding packed 8-bit unsigned integer\n"
52830"/// values of the 64-bit integer vectors, and writes the lesser value to the\n"
52831"/// corresponding bits in the destination.\n"
52832"///\n"
52833"/// \\headerfile <x86intrin.h>\n"
52834"///\n"
52835"/// This intrinsic corresponds to the <c> PMINUB </c> instruction.\n"
52836"///\n"
52837"/// \\param __a\n"
52838"/// A 64-bit integer vector containing one of the source operands.\n"
52839"/// \\param __b\n"
52840"/// A 64-bit integer vector containing one of the source operands.\n"
52841"/// \\returns A 64-bit integer vector containing the comparison results.\n"
52842"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
52843"_mm_min_pu8(__m64 __a, __m64 __b)\n"
52844"{\n"
52845" return (__m64)__builtin_ia32_pminub((__v8qi)__a, (__v8qi)__b);\n"
52846"}\n"
52847"\n"
52848"/// Takes the most significant bit from each 8-bit element in a 64-bit\n"
52849"/// integer vector to create an 8-bit mask value. Zero-extends the value to\n"
52850"/// 32-bit integer and writes it to the destination.\n"
52851"///\n"
52852"/// \\headerfile <x86intrin.h>\n"
52853"///\n"
52854"/// This intrinsic corresponds to the <c> PMOVMSKB </c> instruction.\n"
52855"///\n"
52856"/// \\param __a\n"
52857"/// A 64-bit integer vector containing the values with bits to be extracted.\n"
52858"/// \\returns The most significant bit from each 8-bit element in \\a __a,\n"
52859"/// written to bits [7:0].\n"
52860"static __inline__ int __DEFAULT_FN_ATTRS_MMX\n"
52861"_mm_movemask_pi8(__m64 __a)\n"
52862"{\n"
52863" return __builtin_ia32_pmovmskb((__v8qi)__a);\n"
52864"}\n"
52865"\n"
52866"/// Multiplies packed 16-bit unsigned integer values and writes the\n"
52867"/// high-order 16 bits of each 32-bit product to the corresponding bits in\n"
52868"/// the destination.\n"
52869"///\n"
52870"/// \\headerfile <x86intrin.h>\n"
52871"///\n"
52872"/// This intrinsic corresponds to the <c> PMULHUW </c> instruction.\n"
52873"///\n"
52874"/// \\param __a\n"
52875"/// A 64-bit integer vector containing one of the source operands.\n"
52876"/// \\param __b\n"
52877"/// A 64-bit integer vector containing one of the source operands.\n"
52878"/// \\returns A 64-bit integer vector containing the products of both operands.\n"
52879"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
52880"_mm_mulhi_pu16(__m64 __a, __m64 __b)\n"
52881"{\n"
52882" return (__m64)__builtin_ia32_pmulhuw((__v4hi)__a, (__v4hi)__b);\n"
52883"}\n"
52884"\n"
52885"/// Shuffles the 4 16-bit integers from a 64-bit integer vector to the\n"
52886"/// destination, as specified by the immediate value operand.\n"
52887"///\n"
52888"/// \\headerfile <x86intrin.h>\n"
52889"///\n"
52890"/// \\code\n"
52891"/// __m64 _mm_shuffle_pi16(__m64 a, const int n);\n"
52892"/// \\endcode\n"
52893"///\n"
52894"/// This intrinsic corresponds to the <c> PSHUFW </c> instruction.\n"
52895"///\n"
52896"/// \\param a\n"
52897"/// A 64-bit integer vector containing the values to be shuffled.\n"
52898"/// \\param n\n"
52899"/// An immediate value containing an 8-bit value specifying which elements to\n"
52900"/// copy from \\a a. The destinations within the 64-bit destination are\n"
52901"/// assigned values as follows: \\n\n"
52902"/// Bits [1:0] are used to assign values to bits [15:0] in the\n"
52903"/// destination. \\n\n"
52904"/// Bits [3:2] are used to assign values to bits [31:16] in the\n"
52905"/// destination. \\n\n"
52906"/// Bits [5:4] are used to assign values to bits [47:32] in the\n"
52907"/// destination. \\n\n"
52908"/// Bits [7:6] are used to assign values to bits [63:48] in the\n"
52909"/// destination. \\n\n"
52910"/// Bit value assignments: \\n\n"
52911"/// 00: assigned from bits [15:0] of \\a a. \\n\n"
52912"/// 01: assigned from bits [31:16] of \\a a. \\n\n"
52913"/// 10: assigned from bits [47:32] of \\a a. \\n\n"
52914"/// 11: assigned from bits [63:48] of \\a a.\n"
52915"/// \\returns A 64-bit integer vector containing the shuffled values.\n"
52916"#define _mm_shuffle_pi16(a, n) \\\n"
52917" (__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n))\n"
52918"\n"
52919"/// Conditionally copies the values from each 8-bit element in the first\n"
52920"/// 64-bit integer vector operand to the specified memory location, as\n"
52921"/// specified by the most significant bit in the corresponding element in the\n"
52922"/// second 64-bit integer vector operand.\n"
52923"///\n"
52924"/// To minimize caching, the data is flagged as non-temporal\n"
52925"/// (unlikely to be used again soon).\n"
52926"///\n"
52927"/// \\headerfile <x86intrin.h>\n"
52928"///\n"
52929"/// This intrinsic corresponds to the <c> MASKMOVQ </c> instruction.\n"
52930"///\n"
52931"/// \\param __d\n"
52932"/// A 64-bit integer vector containing the values with elements to be copied.\n"
52933"/// \\param __n\n"
52934"/// A 64-bit integer vector operand. The most significant bit from each 8-bit\n"
52935"/// element determines whether the corresponding element in operand \\a __d\n"
52936"/// is copied. If the most significant bit of a given element is 1, the\n"
52937"/// corresponding element in operand \\a __d is copied.\n"
52938"/// \\param __p\n"
52939"/// A pointer to a 64-bit memory location that will receive the conditionally\n"
52940"/// copied integer values. The address of the memory location does not have\n"
52941"/// to be aligned.\n"
52942"static __inline__ void __DEFAULT_FN_ATTRS_MMX\n"
52943"_mm_maskmove_si64(__m64 __d, __m64 __n, char *__p)\n"
52944"{\n"
52945" __builtin_ia32_maskmovq((__v8qi)__d, (__v8qi)__n, __p);\n"
52946"}\n"
52947"\n"
52948"/// Computes the rounded averages of the packed unsigned 8-bit integer\n"
52949"/// values and writes the averages to the corresponding bits in the\n"
52950"/// destination.\n"
52951"///\n"
52952"/// \\headerfile <x86intrin.h>\n"
52953"///\n"
52954"/// This intrinsic corresponds to the <c> PAVGB </c> instruction.\n"
52955"///\n"
52956"/// \\param __a\n"
52957"/// A 64-bit integer vector containing one of the source operands.\n"
52958"/// \\param __b\n"
52959"/// A 64-bit integer vector containing one of the source operands.\n"
52960"/// \\returns A 64-bit integer vector containing the averages of both operands.\n"
52961"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
52962"_mm_avg_pu8(__m64 __a, __m64 __b)\n"
52963"{\n"
52964" return (__m64)__builtin_ia32_pavgb((__v8qi)__a, (__v8qi)__b);\n"
52965"}\n"
52966"\n"
52967"/// Computes the rounded averages of the packed unsigned 16-bit integer\n"
52968"/// values and writes the averages to the corresponding bits in the\n"
52969"/// destination.\n"
52970"///\n"
52971"/// \\headerfile <x86intrin.h>\n"
52972"///\n"
52973"/// This intrinsic corresponds to the <c> PAVGW </c> instruction.\n"
52974"///\n"
52975"/// \\param __a\n"
52976"/// A 64-bit integer vector containing one of the source operands.\n"
52977"/// \\param __b\n"
52978"/// A 64-bit integer vector containing one of the source operands.\n"
52979"/// \\returns A 64-bit integer vector containing the averages of both operands.\n"
52980"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
52981"_mm_avg_pu16(__m64 __a, __m64 __b)\n"
52982"{\n"
52983" return (__m64)__builtin_ia32_pavgw((__v4hi)__a, (__v4hi)__b);\n"
52984"}\n"
52985"\n"
52986"/// Subtracts the corresponding 8-bit unsigned integer values of the two\n"
52987"/// 64-bit vector operands and computes the absolute value for each of the\n"
52988"/// difference. Then sum of the 8 absolute differences is written to the\n"
52989"/// bits [15:0] of the destination; the remaining bits [63:16] are cleared.\n"
52990"///\n"
52991"/// \\headerfile <x86intrin.h>\n"
52992"///\n"
52993"/// This intrinsic corresponds to the <c> PSADBW </c> instruction.\n"
52994"///\n"
52995"/// \\param __a\n"
52996"/// A 64-bit integer vector containing one of the source operands.\n"
52997"/// \\param __b\n"
52998"/// A 64-bit integer vector containing one of the source operands.\n"
52999"/// \\returns A 64-bit integer vector whose lower 16 bits contain the sums of the\n"
53000"/// sets of absolute differences between both operands. The upper bits are\n"
53001"/// cleared.\n"
53002"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
53003"_mm_sad_pu8(__m64 __a, __m64 __b)\n"
53004"{\n"
53005" return (__m64)__builtin_ia32_psadbw((__v8qi)__a, (__v8qi)__b);\n"
53006"}\n"
53007"\n"
53008"#if defined(__cplusplus)\n"
53009"extern \"C\" {\n"
53010"#endif\n"
53011"\n"
53012"/// Returns the contents of the MXCSR register as a 32-bit unsigned\n"
53013"/// integer value.\n"
53014"///\n"
53015"/// There are several groups of macros associated with this\n"
53016"/// intrinsic, including:\n"
53017"/// <ul>\n"
53018"/// <li>\n"
53019"/// For checking exception states: _MM_EXCEPT_INVALID, _MM_EXCEPT_DIV_ZERO,\n"
53020"/// _MM_EXCEPT_DENORM, _MM_EXCEPT_OVERFLOW, _MM_EXCEPT_UNDERFLOW,\n"
53021"/// _MM_EXCEPT_INEXACT. There is a convenience wrapper\n"
53022"/// _MM_GET_EXCEPTION_STATE().\n"
53023"/// </li>\n"
53024"/// <li>\n"
53025"/// For checking exception masks: _MM_MASK_UNDERFLOW, _MM_MASK_OVERFLOW,\n"
53026"/// _MM_MASK_INVALID, _MM_MASK_DENORM, _MM_MASK_DIV_ZERO, _MM_MASK_INEXACT.\n"
53027"/// There is a convenience wrapper _MM_GET_EXCEPTION_MASK().\n"
53028"/// </li>\n"
53029"/// <li>\n"
53030"/// For checking rounding modes: _MM_ROUND_NEAREST, _MM_ROUND_DOWN,\n"
53031"/// _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO. There is a convenience wrapper\n"
53032"/// _MM_GET_ROUNDING_MODE().\n"
53033"/// </li>\n"
53034"/// <li>\n"
53035"/// For checking flush-to-zero mode: _MM_FLUSH_ZERO_ON, _MM_FLUSH_ZERO_OFF.\n"
53036"/// There is a convenience wrapper _MM_GET_FLUSH_ZERO_MODE().\n"
53037"/// </li>\n"
53038"/// <li>\n"
53039"/// For checking denormals-are-zero mode: _MM_DENORMALS_ZERO_ON,\n"
53040"/// _MM_DENORMALS_ZERO_OFF. There is a convenience wrapper\n"
53041"/// _MM_GET_DENORMALS_ZERO_MODE().\n"
53042"/// </li>\n"
53043"/// </ul>\n"
53044"///\n"
53045"/// For example, the following expression checks if an overflow exception has\n"
53046"/// occurred:\n"
53047"/// \\code\n"
53048"/// ( _mm_getcsr() & _MM_EXCEPT_OVERFLOW )\n"
53049"/// \\endcode\n"
53050"///\n"
53051"/// The following expression gets the current rounding mode:\n"
53052"/// \\code\n"
53053"/// _MM_GET_ROUNDING_MODE()\n"
53054"/// \\endcode\n"
53055"///\n"
53056"/// \\headerfile <x86intrin.h>\n"
53057"///\n"
53058"/// This intrinsic corresponds to the <c> VSTMXCSR / STMXCSR </c> instruction.\n"
53059"///\n"
53060"/// \\returns A 32-bit unsigned integer containing the contents of the MXCSR\n"
53061"/// register.\n"
53062"unsigned int _mm_getcsr(void);\n"
53063"\n"
53064"/// Sets the MXCSR register with the 32-bit unsigned integer value.\n"
53065"///\n"
53066"/// There are several groups of macros associated with this intrinsic,\n"
53067"/// including:\n"
53068"/// <ul>\n"
53069"/// <li>\n"
53070"/// For setting exception states: _MM_EXCEPT_INVALID, _MM_EXCEPT_DIV_ZERO,\n"
53071"/// _MM_EXCEPT_DENORM, _MM_EXCEPT_OVERFLOW, _MM_EXCEPT_UNDERFLOW,\n"
53072"/// _MM_EXCEPT_INEXACT. There is a convenience wrapper\n"
53073"/// _MM_SET_EXCEPTION_STATE(x) where x is one of these macros.\n"
53074"/// </li>\n"
53075"/// <li>\n"
53076"/// For setting exception masks: _MM_MASK_UNDERFLOW, _MM_MASK_OVERFLOW,\n"
53077"/// _MM_MASK_INVALID, _MM_MASK_DENORM, _MM_MASK_DIV_ZERO, _MM_MASK_INEXACT.\n"
53078"/// There is a convenience wrapper _MM_SET_EXCEPTION_MASK(x) where x is one\n"
53079"/// of these macros.\n"
53080"/// </li>\n"
53081"/// <li>\n"
53082"/// For setting rounding modes: _MM_ROUND_NEAREST, _MM_ROUND_DOWN,\n"
53083"/// _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO. There is a convenience wrapper\n"
53084"/// _MM_SET_ROUNDING_MODE(x) where x is one of these macros.\n"
53085"/// </li>\n"
53086"/// <li>\n"
53087"/// For setting flush-to-zero mode: _MM_FLUSH_ZERO_ON, _MM_FLUSH_ZERO_OFF.\n"
53088"/// There is a convenience wrapper _MM_SET_FLUSH_ZERO_MODE(x) where x is\n"
53089"/// one of these macros.\n"
53090"/// </li>\n"
53091"/// <li>\n"
53092"/// For setting denormals-are-zero mode: _MM_DENORMALS_ZERO_ON,\n"
53093"/// _MM_DENORMALS_ZERO_OFF. There is a convenience wrapper\n"
53094"/// _MM_SET_DENORMALS_ZERO_MODE(x) where x is one of these macros.\n"
53095"/// </li>\n"
53096"/// </ul>\n"
53097"///\n"
53098"/// For example, the following expression causes subsequent floating-point\n"
53099"/// operations to round up:\n"
53100"/// _mm_setcsr(_mm_getcsr() | _MM_ROUND_UP)\n"
53101"///\n"
53102"/// The following example sets the DAZ and FTZ flags:\n"
53103"/// \\code\n"
53104"/// void setFlags() {\n"
53105"/// _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);\n"
53106"/// _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);\n"
53107"/// }\n"
53108"/// \\endcode\n"
53109"///\n"
53110"/// \\headerfile <x86intrin.h>\n"
53111"///\n"
53112"/// This intrinsic corresponds to the <c> VLDMXCSR / LDMXCSR </c> instruction.\n"
53113"///\n"
53114"/// \\param __i\n"
53115"/// A 32-bit unsigned integer value to be written to the MXCSR register.\n"
53116"void _mm_setcsr(unsigned int __i);\n"
53117"\n"
53118"#if defined(__cplusplus)\n"
53119"} // extern \"C\"\n"
53120"#endif\n"
53121"\n"
53122"/// Selects 4 float values from the 128-bit operands of [4 x float], as\n"
53123"/// specified by the immediate value operand.\n"
53124"///\n"
53125"/// \\headerfile <x86intrin.h>\n"
53126"///\n"
53127"/// \\code\n"
53128"/// __m128 _mm_shuffle_ps(__m128 a, __m128 b, const int mask);\n"
53129"/// \\endcode\n"
53130"///\n"
53131"/// This intrinsic corresponds to the <c> VSHUFPS / SHUFPS </c> instruction.\n"
53132"///\n"
53133"/// \\param a\n"
53134"/// A 128-bit vector of [4 x float].\n"
53135"/// \\param b\n"
53136"/// A 128-bit vector of [4 x float].\n"
53137"/// \\param mask\n"
53138"/// An immediate value containing an 8-bit value specifying which elements to\n"
53139"/// copy from \\a a and \\a b. \\n\n"
53140"/// Bits [3:0] specify the values copied from operand \\a a. \\n\n"
53141"/// Bits [7:4] specify the values copied from operand \\a b. \\n\n"
53142"/// The destinations within the 128-bit destination are assigned values as\n"
53143"/// follows: \\n\n"
53144"/// Bits [1:0] are used to assign values to bits [31:0] in the\n"
53145"/// destination. \\n\n"
53146"/// Bits [3:2] are used to assign values to bits [63:32] in the\n"
53147"/// destination. \\n\n"
53148"/// Bits [5:4] are used to assign values to bits [95:64] in the\n"
53149"/// destination. \\n\n"
53150"/// Bits [7:6] are used to assign values to bits [127:96] in the\n"
53151"/// destination. \\n\n"
53152"/// Bit value assignments: \\n\n"
53153"/// 00: Bits [31:0] copied from the specified operand. \\n\n"
53154"/// 01: Bits [63:32] copied from the specified operand. \\n\n"
53155"/// 10: Bits [95:64] copied from the specified operand. \\n\n"
53156"/// 11: Bits [127:96] copied from the specified operand.\n"
53157"/// \\returns A 128-bit vector of [4 x float] containing the shuffled values.\n"
53158"#define _mm_shuffle_ps(a, b, mask) \\\n"
53159" (__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \\\n"
53160" (int)(mask))\n"
53161"\n"
53162"/// Unpacks the high-order (index 2,3) values from two 128-bit vectors of\n"
53163"/// [4 x float] and interleaves them into a 128-bit vector of [4 x float].\n"
53164"///\n"
53165"/// \\headerfile <x86intrin.h>\n"
53166"///\n"
53167"/// This intrinsic corresponds to the <c> VUNPCKHPS / UNPCKHPS </c> instruction.\n"
53168"///\n"
53169"/// \\param __a\n"
53170"/// A 128-bit vector of [4 x float]. \\n\n"
53171"/// Bits [95:64] are written to bits [31:0] of the destination. \\n\n"
53172"/// Bits [127:96] are written to bits [95:64] of the destination.\n"
53173"/// \\param __b\n"
53174"/// A 128-bit vector of [4 x float].\n"
53175"/// Bits [95:64] are written to bits [63:32] of the destination. \\n\n"
53176"/// Bits [127:96] are written to bits [127:96] of the destination.\n"
53177"/// \\returns A 128-bit vector of [4 x float] containing the interleaved values.\n"
53178"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53179"_mm_unpackhi_ps(__m128 __a, __m128 __b)\n"
53180"{\n"
53181" return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 2, 6, 3, 7);\n"
53182"}\n"
53183"\n"
53184"/// Unpacks the low-order (index 0,1) values from two 128-bit vectors of\n"
53185"/// [4 x float] and interleaves them into a 128-bit vector of [4 x float].\n"
53186"///\n"
53187"/// \\headerfile <x86intrin.h>\n"
53188"///\n"
53189"/// This intrinsic corresponds to the <c> VUNPCKLPS / UNPCKLPS </c> instruction.\n"
53190"///\n"
53191"/// \\param __a\n"
53192"/// A 128-bit vector of [4 x float]. \\n\n"
53193"/// Bits [31:0] are written to bits [31:0] of the destination. \\n\n"
53194"/// Bits [63:32] are written to bits [95:64] of the destination.\n"
53195"/// \\param __b\n"
53196"/// A 128-bit vector of [4 x float]. \\n\n"
53197"/// Bits [31:0] are written to bits [63:32] of the destination. \\n\n"
53198"/// Bits [63:32] are written to bits [127:96] of the destination.\n"
53199"/// \\returns A 128-bit vector of [4 x float] containing the interleaved values.\n"
53200"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53201"_mm_unpacklo_ps(__m128 __a, __m128 __b)\n"
53202"{\n"
53203" return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 0, 4, 1, 5);\n"
53204"}\n"
53205"\n"
53206"/// Constructs a 128-bit floating-point vector of [4 x float]. The lower\n"
53207"/// 32 bits are set to the lower 32 bits of the second parameter. The upper\n"
53208"/// 96 bits are set to the upper 96 bits of the first parameter.\n"
53209"///\n"
53210"/// \\headerfile <x86intrin.h>\n"
53211"///\n"
53212"/// This intrinsic corresponds to the <c> VBLENDPS / BLENDPS / MOVSS </c>\n"
53213"/// instruction.\n"
53214"///\n"
53215"/// \\param __a\n"
53216"/// A 128-bit floating-point vector of [4 x float]. The upper 96 bits are\n"
53217"/// written to the upper 96 bits of the result.\n"
53218"/// \\param __b\n"
53219"/// A 128-bit floating-point vector of [4 x float]. The lower 32 bits are\n"
53220"/// written to the lower 32 bits of the result.\n"
53221"/// \\returns A 128-bit floating-point vector of [4 x float].\n"
53222"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53223"_mm_move_ss(__m128 __a, __m128 __b)\n"
53224"{\n"
53225" __a[0] = __b[0];\n"
53226" return __a;\n"
53227"}\n"
53228"\n"
53229"/// Constructs a 128-bit floating-point vector of [4 x float]. The lower\n"
53230"/// 64 bits are set to the upper 64 bits of the second parameter. The upper\n"
53231"/// 64 bits are set to the upper 64 bits of the first parameter.\n"
53232"///\n"
53233"/// \\headerfile <x86intrin.h>\n"
53234"///\n"
53235"/// This intrinsic corresponds to the <c> VUNPCKHPD / UNPCKHPD </c> instruction.\n"
53236"///\n"
53237"/// \\param __a\n"
53238"/// A 128-bit floating-point vector of [4 x float]. The upper 64 bits are\n"
53239"/// written to the upper 64 bits of the result.\n"
53240"/// \\param __b\n"
53241"/// A 128-bit floating-point vector of [4 x float]. The upper 64 bits are\n"
53242"/// written to the lower 64 bits of the result.\n"
53243"/// \\returns A 128-bit floating-point vector of [4 x float].\n"
53244"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53245"_mm_movehl_ps(__m128 __a, __m128 __b)\n"
53246"{\n"
53247" return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 6, 7, 2, 3);\n"
53248"}\n"
53249"\n"
53250"/// Constructs a 128-bit floating-point vector of [4 x float]. The lower\n"
53251"/// 64 bits are set to the lower 64 bits of the first parameter. The upper\n"
53252"/// 64 bits are set to the lower 64 bits of the second parameter.\n"
53253"///\n"
53254"/// \\headerfile <x86intrin.h>\n"
53255"///\n"
53256"/// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction.\n"
53257"///\n"
53258"/// \\param __a\n"
53259"/// A 128-bit floating-point vector of [4 x float]. The lower 64 bits are\n"
53260"/// written to the lower 64 bits of the result.\n"
53261"/// \\param __b\n"
53262"/// A 128-bit floating-point vector of [4 x float]. The lower 64 bits are\n"
53263"/// written to the upper 64 bits of the result.\n"
53264"/// \\returns A 128-bit floating-point vector of [4 x float].\n"
53265"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
53266"_mm_movelh_ps(__m128 __a, __m128 __b)\n"
53267"{\n"
53268" return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 0, 1, 4, 5);\n"
53269"}\n"
53270"\n"
53271"/// Converts a 64-bit vector of [4 x i16] into a 128-bit vector of [4 x\n"
53272"/// float].\n"
53273"///\n"
53274"/// \\headerfile <x86intrin.h>\n"
53275"///\n"
53276"/// This intrinsic corresponds to the <c> CVTPI2PS + COMPOSITE </c> instruction.\n"
53277"///\n"
53278"/// \\param __a\n"
53279"/// A 64-bit vector of [4 x i16]. The elements of the destination are copied\n"
53280"/// from the corresponding elements in this operand.\n"
53281"/// \\returns A 128-bit vector of [4 x float] containing the copied and converted\n"
53282"/// values from the operand.\n"
53283"static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX\n"
53284"_mm_cvtpi16_ps(__m64 __a)\n"
53285"{\n"
53286" __m64 __b, __c;\n"
53287" __m128 __r;\n"
53288"\n"
53289" __b = _mm_setzero_si64();\n"
53290" __b = _mm_cmpgt_pi16(__b, __a);\n"
53291" __c = _mm_unpackhi_pi16(__a, __b);\n"
53292" __r = _mm_setzero_ps();\n"
53293" __r = _mm_cvtpi32_ps(__r, __c);\n"
53294" __r = _mm_movelh_ps(__r, __r);\n"
53295" __c = _mm_unpacklo_pi16(__a, __b);\n"
53296" __r = _mm_cvtpi32_ps(__r, __c);\n"
53297"\n"
53298" return __r;\n"
53299"}\n"
53300"\n"
53301"/// Converts a 64-bit vector of 16-bit unsigned integer values into a\n"
53302"/// 128-bit vector of [4 x float].\n"
53303"///\n"
53304"/// \\headerfile <x86intrin.h>\n"
53305"///\n"
53306"/// This intrinsic corresponds to the <c> CVTPI2PS + COMPOSITE </c> instruction.\n"
53307"///\n"
53308"/// \\param __a\n"
53309"/// A 64-bit vector of 16-bit unsigned integer values. The elements of the\n"
53310"/// destination are copied from the corresponding elements in this operand.\n"
53311"/// \\returns A 128-bit vector of [4 x float] containing the copied and converted\n"
53312"/// values from the operand.\n"
53313"static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX\n"
53314"_mm_cvtpu16_ps(__m64 __a)\n"
53315"{\n"
53316" __m64 __b, __c;\n"
53317" __m128 __r;\n"
53318"\n"
53319" __b = _mm_setzero_si64();\n"
53320" __c = _mm_unpackhi_pi16(__a, __b);\n"
53321" __r = _mm_setzero_ps();\n"
53322" __r = _mm_cvtpi32_ps(__r, __c);\n"
53323" __r = _mm_movelh_ps(__r, __r);\n"
53324" __c = _mm_unpacklo_pi16(__a, __b);\n"
53325" __r = _mm_cvtpi32_ps(__r, __c);\n"
53326"\n"
53327" return __r;\n"
53328"}\n"
53329"\n"
53330"/// Converts the lower four 8-bit values from a 64-bit vector of [8 x i8]\n"
53331"/// into a 128-bit vector of [4 x float].\n"
53332"///\n"
53333"/// \\headerfile <x86intrin.h>\n"
53334"///\n"
53335"/// This intrinsic corresponds to the <c> CVTPI2PS + COMPOSITE </c> instruction.\n"
53336"///\n"
53337"/// \\param __a\n"
53338"/// A 64-bit vector of [8 x i8]. The elements of the destination are copied\n"
53339"/// from the corresponding lower 4 elements in this operand.\n"
53340"/// \\returns A 128-bit vector of [4 x float] containing the copied and converted\n"
53341"/// values from the operand.\n"
53342"static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX\n"
53343"_mm_cvtpi8_ps(__m64 __a)\n"
53344"{\n"
53345" __m64 __b;\n"
53346"\n"
53347" __b = _mm_setzero_si64();\n"
53348" __b = _mm_cmpgt_pi8(__b, __a);\n"
53349" __b = _mm_unpacklo_pi8(__a, __b);\n"
53350"\n"
53351" return _mm_cvtpi16_ps(__b);\n"
53352"}\n"
53353"\n"
53354"/// Converts the lower four unsigned 8-bit integer values from a 64-bit\n"
53355"/// vector of [8 x u8] into a 128-bit vector of [4 x float].\n"
53356"///\n"
53357"/// \\headerfile <x86intrin.h>\n"
53358"///\n"
53359"/// This intrinsic corresponds to the <c> CVTPI2PS + COMPOSITE </c> instruction.\n"
53360"///\n"
53361"/// \\param __a\n"
53362"/// A 64-bit vector of unsigned 8-bit integer values. The elements of the\n"
53363"/// destination are copied from the corresponding lower 4 elements in this\n"
53364"/// operand.\n"
53365"/// \\returns A 128-bit vector of [4 x float] containing the copied and converted\n"
53366"/// values from the source operand.\n"
53367"static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX\n"
53368"_mm_cvtpu8_ps(__m64 __a)\n"
53369"{\n"
53370" __m64 __b;\n"
53371"\n"
53372" __b = _mm_setzero_si64();\n"
53373" __b = _mm_unpacklo_pi8(__a, __b);\n"
53374"\n"
53375" return _mm_cvtpi16_ps(__b);\n"
53376"}\n"
53377"\n"
53378"/// Converts the two 32-bit signed integer values from each 64-bit vector\n"
53379"/// operand of [2 x i32] into a 128-bit vector of [4 x float].\n"
53380"///\n"
53381"/// \\headerfile <x86intrin.h>\n"
53382"///\n"
53383"/// This intrinsic corresponds to the <c> CVTPI2PS + COMPOSITE </c> instruction.\n"
53384"///\n"
53385"/// \\param __a\n"
53386"/// A 64-bit vector of [2 x i32]. The lower elements of the destination are\n"
53387"/// copied from the elements in this operand.\n"
53388"/// \\param __b\n"
53389"/// A 64-bit vector of [2 x i32]. The upper elements of the destination are\n"
53390"/// copied from the elements in this operand.\n"
53391"/// \\returns A 128-bit vector of [4 x float] whose lower 64 bits contain the\n"
53392"/// copied and converted values from the first operand. The upper 64 bits\n"
53393"/// contain the copied and converted values from the second operand.\n"
53394"static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX\n"
53395"_mm_cvtpi32x2_ps(__m64 __a, __m64 __b)\n"
53396"{\n"
53397" __m128 __c;\n"
53398"\n"
53399" __c = _mm_setzero_ps();\n"
53400" __c = _mm_cvtpi32_ps(__c, __b);\n"
53401" __c = _mm_movelh_ps(__c, __c);\n"
53402"\n"
53403" return _mm_cvtpi32_ps(__c, __a);\n"
53404"}\n"
53405"\n"
53406"/// Converts each single-precision floating-point element of a 128-bit\n"
53407"/// floating-point vector of [4 x float] into a 16-bit signed integer, and\n"
53408"/// packs the results into a 64-bit integer vector of [4 x i16].\n"
53409"///\n"
53410"/// If the floating-point element is NaN or infinity, or if the\n"
53411"/// floating-point element is greater than 0x7FFFFFFF or less than -0x8000,\n"
53412"/// it is converted to 0x8000. Otherwise if the floating-point element is\n"
53413"/// greater than 0x7FFF, it is converted to 0x7FFF.\n"
53414"///\n"
53415"/// \\headerfile <x86intrin.h>\n"
53416"///\n"
53417"/// This intrinsic corresponds to the <c> CVTPS2PI + COMPOSITE </c> instruction.\n"
53418"///\n"
53419"/// \\param __a\n"
53420"/// A 128-bit floating-point vector of [4 x float].\n"
53421"/// \\returns A 64-bit integer vector of [4 x i16] containing the converted\n"
53422"/// values.\n"
53423"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
53424"_mm_cvtps_pi16(__m128 __a)\n"
53425"{\n"
53426" __m64 __b, __c;\n"
53427"\n"
53428" __b = _mm_cvtps_pi32(__a);\n"
53429" __a = _mm_movehl_ps(__a, __a);\n"
53430" __c = _mm_cvtps_pi32(__a);\n"
53431"\n"
53432" return _mm_packs_pi32(__b, __c);\n"
53433"}\n"
53434"\n"
53435"/// Converts each single-precision floating-point element of a 128-bit\n"
53436"/// floating-point vector of [4 x float] into an 8-bit signed integer, and\n"
53437"/// packs the results into the lower 32 bits of a 64-bit integer vector of\n"
53438"/// [8 x i8]. The upper 32 bits of the vector are set to 0.\n"
53439"///\n"
53440"/// If the floating-point element is NaN or infinity, or if the\n"
53441"/// floating-point element is greater than 0x7FFFFFFF or less than -0x80, it\n"
53442"/// is converted to 0x80. Otherwise if the floating-point element is greater\n"
53443"/// than 0x7F, it is converted to 0x7F.\n"
53444"///\n"
53445"/// \\headerfile <x86intrin.h>\n"
53446"///\n"
53447"/// This intrinsic corresponds to the <c> CVTPS2PI + COMPOSITE </c> instruction.\n"
53448"///\n"
53449"/// \\param __a\n"
53450"/// 128-bit floating-point vector of [4 x float].\n"
53451"/// \\returns A 64-bit integer vector of [8 x i8]. The lower 32 bits contain the\n"
53452"/// converted values and the uppper 32 bits are set to zero.\n"
53453"static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n"
53454"_mm_cvtps_pi8(__m128 __a)\n"
53455"{\n"
53456" __m64 __b, __c;\n"
53457"\n"
53458" __b = _mm_cvtps_pi16(__a);\n"
53459" __c = _mm_setzero_si64();\n"
53460"\n"
53461" return _mm_packs_pi16(__b, __c);\n"
53462"}\n"
53463"\n"
53464"/// Extracts the sign bits from each single-precision floating-point\n"
53465"/// element of a 128-bit floating-point vector of [4 x float] and returns the\n"
53466"/// sign bits in bits [0:3] of the result. Bits [31:4] of the result are set\n"
53467"/// to zero.\n"
53468"///\n"
53469"/// \\headerfile <x86intrin.h>\n"
53470"///\n"
53471"/// This intrinsic corresponds to the <c> VMOVMSKPS / MOVMSKPS </c> instruction.\n"
53472"///\n"
53473"/// \\param __a\n"
53474"/// A 128-bit floating-point vector of [4 x float].\n"
53475"/// \\returns A 32-bit integer value. Bits [3:0] contain the sign bits from each\n"
53476"/// single-precision floating-point element of the parameter. Bits [31:4] are\n"
53477"/// set to zero.\n"
53478"static __inline__ int __DEFAULT_FN_ATTRS\n"
53479"_mm_movemask_ps(__m128 __a)\n"
53480"{\n"
53481" return __builtin_ia32_movmskps((__v4sf)__a);\n"
53482"}\n"
53483"\n"
53484"\n"
53485"#define _MM_ALIGN16 __attribute__((aligned(16)))\n"
53486"\n"
53487"#define _MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w))\n"
53488"\n"
53489"#define _MM_EXCEPT_INVALID (0x0001)\n"
53490"#define _MM_EXCEPT_DENORM (0x0002)\n"
53491"#define _MM_EXCEPT_DIV_ZERO (0x0004)\n"
53492"#define _MM_EXCEPT_OVERFLOW (0x0008)\n"
53493"#define _MM_EXCEPT_UNDERFLOW (0x0010)\n"
53494"#define _MM_EXCEPT_INEXACT (0x0020)\n"
53495"#define _MM_EXCEPT_MASK (0x003f)\n"
53496"\n"
53497"#define _MM_MASK_INVALID (0x0080)\n"
53498"#define _MM_MASK_DENORM (0x0100)\n"
53499"#define _MM_MASK_DIV_ZERO (0x0200)\n"
53500"#define _MM_MASK_OVERFLOW (0x0400)\n"
53501"#define _MM_MASK_UNDERFLOW (0x0800)\n"
53502"#define _MM_MASK_INEXACT (0x1000)\n"
53503"#define _MM_MASK_MASK (0x1f80)\n"
53504"\n"
53505"#define _MM_ROUND_NEAREST (0x0000)\n"
53506"#define _MM_ROUND_DOWN (0x2000)\n"
53507"#define _MM_ROUND_UP (0x4000)\n"
53508"#define _MM_ROUND_TOWARD_ZERO (0x6000)\n"
53509"#define _MM_ROUND_MASK (0x6000)\n"
53510"\n"
53511"#define _MM_FLUSH_ZERO_MASK (0x8000)\n"
53512"#define _MM_FLUSH_ZERO_ON (0x8000)\n"
53513"#define _MM_FLUSH_ZERO_OFF (0x0000)\n"
53514"\n"
53515"#define _MM_GET_EXCEPTION_MASK() (_mm_getcsr() & _MM_MASK_MASK)\n"
53516"#define _MM_GET_EXCEPTION_STATE() (_mm_getcsr() & _MM_EXCEPT_MASK)\n"
53517"#define _MM_GET_FLUSH_ZERO_MODE() (_mm_getcsr() & _MM_FLUSH_ZERO_MASK)\n"
53518"#define _MM_GET_ROUNDING_MODE() (_mm_getcsr() & _MM_ROUND_MASK)\n"
53519"\n"
53520"#define _MM_SET_EXCEPTION_MASK(x) (_mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | (x)))\n"
53521"#define _MM_SET_EXCEPTION_STATE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | (x)))\n"
53522"#define _MM_SET_FLUSH_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | (x)))\n"
53523"#define _MM_SET_ROUNDING_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | (x)))\n"
53524"\n"
53525"#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \\\n"
53526"do { \\\n"
53527" __m128 tmp3, tmp2, tmp1, tmp0; \\\n"
53528" tmp0 = _mm_unpacklo_ps((row0), (row1)); \\\n"
53529" tmp2 = _mm_unpacklo_ps((row2), (row3)); \\\n"
53530" tmp1 = _mm_unpackhi_ps((row0), (row1)); \\\n"
53531" tmp3 = _mm_unpackhi_ps((row2), (row3)); \\\n"
53532" (row0) = _mm_movelh_ps(tmp0, tmp2); \\\n"
53533" (row1) = _mm_movehl_ps(tmp2, tmp0); \\\n"
53534" (row2) = _mm_movelh_ps(tmp1, tmp3); \\\n"
53535" (row3) = _mm_movehl_ps(tmp3, tmp1); \\\n"
53536"} while (0)\n"
53537"\n"
53538"/* Aliases for compatibility. */\n"
53539"#define _m_pextrw _mm_extract_pi16\n"
53540"#define _m_pinsrw _mm_insert_pi16\n"
53541"#define _m_pmaxsw _mm_max_pi16\n"
53542"#define _m_pmaxub _mm_max_pu8\n"
53543"#define _m_pminsw _mm_min_pi16\n"
53544"#define _m_pminub _mm_min_pu8\n"
53545"#define _m_pmovmskb _mm_movemask_pi8\n"
53546"#define _m_pmulhuw _mm_mulhi_pu16\n"
53547"#define _m_pshufw _mm_shuffle_pi16\n"
53548"#define _m_maskmovq _mm_maskmove_si64\n"
53549"#define _m_pavgb _mm_avg_pu8\n"
53550"#define _m_pavgw _mm_avg_pu16\n"
53551"#define _m_psadbw _mm_sad_pu8\n"
53552"#define _m_ _mm_\n"
53553"#define _m_ _mm_\n"
53554"\n"
53555"#undef __DEFAULT_FN_ATTRS\n"
53556"#undef __DEFAULT_FN_ATTRS_MMX\n"
53557"\n"
53558"/* Ugly hack for backwards-compatibility (compatible with gcc) */\n"
53559"#if defined(__SSE2__) && !__building_module(_Builtin_intrinsics)\n"
53560"#include <emmintrin.h>\n"
53561"#endif\n"
53562"\n"
53563"#endif /* __XMMINTRIN_H */\n"
53564"" } ,
53565 { "/builtins/xopintrin.h" , "/*===---- xopintrin.h - XOP intrinsics -------------------------------------===\n"
53566" *\n"
53567" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
53568" * of this software and associated documentation files (the \"Software\"), to deal\n"
53569" * in the Software without restriction, including without limitation the rights\n"
53570" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
53571" * copies of the Software, and to permit persons to whom the Software is\n"
53572" * furnished to do so, subject to the following conditions:\n"
53573" *\n"
53574" * The above copyright notice and this permission notice shall be included in\n"
53575" * all copies or substantial portions of the Software.\n"
53576" *\n"
53577" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
53578" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
53579" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
53580" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
53581" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
53582" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
53583" * THE SOFTWARE.\n"
53584" *\n"
53585" *===-----------------------------------------------------------------------===\n"
53586" */\n"
53587"\n"
53588"#ifndef __X86INTRIN_H\n"
53589"#error \"Never use <xopintrin.h> directly; include <x86intrin.h> instead.\"\n"
53590"#endif\n"
53591"\n"
53592"#ifndef __XOPINTRIN_H\n"
53593"#define __XOPINTRIN_H\n"
53594"\n"
53595"#include <fma4intrin.h>\n"
53596"\n"
53597"/* Define the default attributes for the functions in this file. */\n"
53598"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"xop\"), __min_vector_width__(128)))\n"
53599"#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__(\"xop\"), __min_vector_width__(256)))\n"
53600"\n"
53601"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53602"_mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)\n"
53603"{\n"
53604" return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);\n"
53605"}\n"
53606"\n"
53607"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53608"_mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C)\n"
53609"{\n"
53610" return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);\n"
53611"}\n"
53612"\n"
53613"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53614"_mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C)\n"
53615"{\n"
53616" return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);\n"
53617"}\n"
53618"\n"
53619"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53620"_mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)\n"
53621"{\n"
53622" return (__m128i)__builtin_ia32_vpmacswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);\n"
53623"}\n"
53624"\n"
53625"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53626"_mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C)\n"
53627"{\n"
53628" return (__m128i)__builtin_ia32_vpmacssdd((__v4si)__A, (__v4si)__B, (__v4si)__C);\n"
53629"}\n"
53630"\n"
53631"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53632"_mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C)\n"
53633"{\n"
53634" return (__m128i)__builtin_ia32_vpmacsdd((__v4si)__A, (__v4si)__B, (__v4si)__C);\n"
53635"}\n"
53636"\n"
53637"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53638"_mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C)\n"
53639"{\n"
53640" return (__m128i)__builtin_ia32_vpmacssdql((__v4si)__A, (__v4si)__B, (__v2di)__C);\n"
53641"}\n"
53642"\n"
53643"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53644"_mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)\n"
53645"{\n"
53646" return (__m128i)__builtin_ia32_vpmacsdql((__v4si)__A, (__v4si)__B, (__v2di)__C);\n"
53647"}\n"
53648"\n"
53649"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53650"_mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)\n"
53651"{\n"
53652" return (__m128i)__builtin_ia32_vpmacssdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);\n"
53653"}\n"
53654"\n"
53655"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53656"_mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)\n"
53657"{\n"
53658" return (__m128i)__builtin_ia32_vpmacsdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);\n"
53659"}\n"
53660"\n"
53661"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53662"_mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C)\n"
53663"{\n"
53664" return (__m128i)__builtin_ia32_vpmadcsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);\n"
53665"}\n"
53666"\n"
53667"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53668"_mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C)\n"
53669"{\n"
53670" return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);\n"
53671"}\n"
53672"\n"
53673"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53674"_mm_haddw_epi8(__m128i __A)\n"
53675"{\n"
53676" return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A);\n"
53677"}\n"
53678"\n"
53679"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53680"_mm_haddd_epi8(__m128i __A)\n"
53681"{\n"
53682" return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A);\n"
53683"}\n"
53684"\n"
53685"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53686"_mm_haddq_epi8(__m128i __A)\n"
53687"{\n"
53688" return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A);\n"
53689"}\n"
53690"\n"
53691"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53692"_mm_haddd_epi16(__m128i __A)\n"
53693"{\n"
53694" return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A);\n"
53695"}\n"
53696"\n"
53697"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53698"_mm_haddq_epi16(__m128i __A)\n"
53699"{\n"
53700" return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A);\n"
53701"}\n"
53702"\n"
53703"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53704"_mm_haddq_epi32(__m128i __A)\n"
53705"{\n"
53706" return (__m128i)__builtin_ia32_vphadddq((__v4si)__A);\n"
53707"}\n"
53708"\n"
53709"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53710"_mm_haddw_epu8(__m128i __A)\n"
53711"{\n"
53712" return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A);\n"
53713"}\n"
53714"\n"
53715"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53716"_mm_haddd_epu8(__m128i __A)\n"
53717"{\n"
53718" return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A);\n"
53719"}\n"
53720"\n"
53721"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53722"_mm_haddq_epu8(__m128i __A)\n"
53723"{\n"
53724" return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A);\n"
53725"}\n"
53726"\n"
53727"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53728"_mm_haddd_epu16(__m128i __A)\n"
53729"{\n"
53730" return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A);\n"
53731"}\n"
53732"\n"
53733"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53734"_mm_haddq_epu16(__m128i __A)\n"
53735"{\n"
53736" return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A);\n"
53737"}\n"
53738"\n"
53739"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53740"_mm_haddq_epu32(__m128i __A)\n"
53741"{\n"
53742" return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A);\n"
53743"}\n"
53744"\n"
53745"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53746"_mm_hsubw_epi8(__m128i __A)\n"
53747"{\n"
53748" return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A);\n"
53749"}\n"
53750"\n"
53751"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53752"_mm_hsubd_epi16(__m128i __A)\n"
53753"{\n"
53754" return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A);\n"
53755"}\n"
53756"\n"
53757"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53758"_mm_hsubq_epi32(__m128i __A)\n"
53759"{\n"
53760" return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A);\n"
53761"}\n"
53762"\n"
53763"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53764"_mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)\n"
53765"{\n"
53766" return (__m128i)(((__v2du)__A & (__v2du)__C) | ((__v2du)__B & ~(__v2du)__C));\n"
53767"}\n"
53768"\n"
53769"static __inline__ __m256i __DEFAULT_FN_ATTRS256\n"
53770"_mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C)\n"
53771"{\n"
53772" return (__m256i)(((__v4du)__A & (__v4du)__C) | ((__v4du)__B & ~(__v4du)__C));\n"
53773"}\n"
53774"\n"
53775"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53776"_mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)\n"
53777"{\n"
53778" return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);\n"
53779"}\n"
53780"\n"
53781"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53782"_mm_rot_epi8(__m128i __A, __m128i __B)\n"
53783"{\n"
53784" return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B);\n"
53785"}\n"
53786"\n"
53787"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53788"_mm_rot_epi16(__m128i __A, __m128i __B)\n"
53789"{\n"
53790" return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B);\n"
53791"}\n"
53792"\n"
53793"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53794"_mm_rot_epi32(__m128i __A, __m128i __B)\n"
53795"{\n"
53796" return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B);\n"
53797"}\n"
53798"\n"
53799"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53800"_mm_rot_epi64(__m128i __A, __m128i __B)\n"
53801"{\n"
53802" return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B);\n"
53803"}\n"
53804"\n"
53805"#define _mm_roti_epi8(A, N) \\\n"
53806" (__m128i)__builtin_ia32_vprotbi((__v16qi)(__m128i)(A), (N))\n"
53807"\n"
53808"#define _mm_roti_epi16(A, N) \\\n"
53809" (__m128i)__builtin_ia32_vprotwi((__v8hi)(__m128i)(A), (N))\n"
53810"\n"
53811"#define _mm_roti_epi32(A, N) \\\n"
53812" (__m128i)__builtin_ia32_vprotdi((__v4si)(__m128i)(A), (N))\n"
53813"\n"
53814"#define _mm_roti_epi64(A, N) \\\n"
53815" (__m128i)__builtin_ia32_vprotqi((__v2di)(__m128i)(A), (N))\n"
53816"\n"
53817"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53818"_mm_shl_epi8(__m128i __A, __m128i __B)\n"
53819"{\n"
53820" return (__m128i)__builtin_ia32_vpshlb((__v16qi)__A, (__v16qi)__B);\n"
53821"}\n"
53822"\n"
53823"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53824"_mm_shl_epi16(__m128i __A, __m128i __B)\n"
53825"{\n"
53826" return (__m128i)__builtin_ia32_vpshlw((__v8hi)__A, (__v8hi)__B);\n"
53827"}\n"
53828"\n"
53829"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53830"_mm_shl_epi32(__m128i __A, __m128i __B)\n"
53831"{\n"
53832" return (__m128i)__builtin_ia32_vpshld((__v4si)__A, (__v4si)__B);\n"
53833"}\n"
53834"\n"
53835"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53836"_mm_shl_epi64(__m128i __A, __m128i __B)\n"
53837"{\n"
53838" return (__m128i)__builtin_ia32_vpshlq((__v2di)__A, (__v2di)__B);\n"
53839"}\n"
53840"\n"
53841"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53842"_mm_sha_epi8(__m128i __A, __m128i __B)\n"
53843"{\n"
53844" return (__m128i)__builtin_ia32_vpshab((__v16qi)__A, (__v16qi)__B);\n"
53845"}\n"
53846"\n"
53847"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53848"_mm_sha_epi16(__m128i __A, __m128i __B)\n"
53849"{\n"
53850" return (__m128i)__builtin_ia32_vpshaw((__v8hi)__A, (__v8hi)__B);\n"
53851"}\n"
53852"\n"
53853"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53854"_mm_sha_epi32(__m128i __A, __m128i __B)\n"
53855"{\n"
53856" return (__m128i)__builtin_ia32_vpshad((__v4si)__A, (__v4si)__B);\n"
53857"}\n"
53858"\n"
53859"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53860"_mm_sha_epi64(__m128i __A, __m128i __B)\n"
53861"{\n"
53862" return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B);\n"
53863"}\n"
53864"\n"
53865"#define _mm_com_epu8(A, B, N) \\\n"
53866" (__m128i)__builtin_ia32_vpcomub((__v16qi)(__m128i)(A), \\\n"
53867" (__v16qi)(__m128i)(B), (N))\n"
53868"\n"
53869"#define _mm_com_epu16(A, B, N) \\\n"
53870" (__m128i)__builtin_ia32_vpcomuw((__v8hi)(__m128i)(A), \\\n"
53871" (__v8hi)(__m128i)(B), (N))\n"
53872"\n"
53873"#define _mm_com_epu32(A, B, N) \\\n"
53874" (__m128i)__builtin_ia32_vpcomud((__v4si)(__m128i)(A), \\\n"
53875" (__v4si)(__m128i)(B), (N))\n"
53876"\n"
53877"#define _mm_com_epu64(A, B, N) \\\n"
53878" (__m128i)__builtin_ia32_vpcomuq((__v2di)(__m128i)(A), \\\n"
53879" (__v2di)(__m128i)(B), (N))\n"
53880"\n"
53881"#define _mm_com_epi8(A, B, N) \\\n"
53882" (__m128i)__builtin_ia32_vpcomb((__v16qi)(__m128i)(A), \\\n"
53883" (__v16qi)(__m128i)(B), (N))\n"
53884"\n"
53885"#define _mm_com_epi16(A, B, N) \\\n"
53886" (__m128i)__builtin_ia32_vpcomw((__v8hi)(__m128i)(A), \\\n"
53887" (__v8hi)(__m128i)(B), (N))\n"
53888"\n"
53889"#define _mm_com_epi32(A, B, N) \\\n"
53890" (__m128i)__builtin_ia32_vpcomd((__v4si)(__m128i)(A), \\\n"
53891" (__v4si)(__m128i)(B), (N))\n"
53892"\n"
53893"#define _mm_com_epi64(A, B, N) \\\n"
53894" (__m128i)__builtin_ia32_vpcomq((__v2di)(__m128i)(A), \\\n"
53895" (__v2di)(__m128i)(B), (N))\n"
53896"\n"
53897"#define _MM_PCOMCTRL_LT 0\n"
53898"#define _MM_PCOMCTRL_LE 1\n"
53899"#define _MM_PCOMCTRL_GT 2\n"
53900"#define _MM_PCOMCTRL_GE 3\n"
53901"#define _MM_PCOMCTRL_EQ 4\n"
53902"#define _MM_PCOMCTRL_NEQ 5\n"
53903"#define _MM_PCOMCTRL_FALSE 6\n"
53904"#define _MM_PCOMCTRL_TRUE 7\n"
53905"\n"
53906"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53907"_mm_comlt_epu8(__m128i __A, __m128i __B)\n"
53908"{\n"
53909" return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LT);\n"
53910"}\n"
53911"\n"
53912"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53913"_mm_comle_epu8(__m128i __A, __m128i __B)\n"
53914"{\n"
53915" return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LE);\n"
53916"}\n"
53917"\n"
53918"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53919"_mm_comgt_epu8(__m128i __A, __m128i __B)\n"
53920"{\n"
53921" return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GT);\n"
53922"}\n"
53923"\n"
53924"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53925"_mm_comge_epu8(__m128i __A, __m128i __B)\n"
53926"{\n"
53927" return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GE);\n"
53928"}\n"
53929"\n"
53930"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53931"_mm_comeq_epu8(__m128i __A, __m128i __B)\n"
53932"{\n"
53933" return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_EQ);\n"
53934"}\n"
53935"\n"
53936"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53937"_mm_comneq_epu8(__m128i __A, __m128i __B)\n"
53938"{\n"
53939" return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_NEQ);\n"
53940"}\n"
53941"\n"
53942"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53943"_mm_comfalse_epu8(__m128i __A, __m128i __B)\n"
53944"{\n"
53945" return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_FALSE);\n"
53946"}\n"
53947"\n"
53948"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53949"_mm_comtrue_epu8(__m128i __A, __m128i __B)\n"
53950"{\n"
53951" return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_TRUE);\n"
53952"}\n"
53953"\n"
53954"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53955"_mm_comlt_epu16(__m128i __A, __m128i __B)\n"
53956"{\n"
53957" return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LT);\n"
53958"}\n"
53959"\n"
53960"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53961"_mm_comle_epu16(__m128i __A, __m128i __B)\n"
53962"{\n"
53963" return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LE);\n"
53964"}\n"
53965"\n"
53966"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53967"_mm_comgt_epu16(__m128i __A, __m128i __B)\n"
53968"{\n"
53969" return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GT);\n"
53970"}\n"
53971"\n"
53972"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53973"_mm_comge_epu16(__m128i __A, __m128i __B)\n"
53974"{\n"
53975" return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GE);\n"
53976"}\n"
53977"\n"
53978"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53979"_mm_comeq_epu16(__m128i __A, __m128i __B)\n"
53980"{\n"
53981" return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_EQ);\n"
53982"}\n"
53983"\n"
53984"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53985"_mm_comneq_epu16(__m128i __A, __m128i __B)\n"
53986"{\n"
53987" return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_NEQ);\n"
53988"}\n"
53989"\n"
53990"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53991"_mm_comfalse_epu16(__m128i __A, __m128i __B)\n"
53992"{\n"
53993" return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_FALSE);\n"
53994"}\n"
53995"\n"
53996"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
53997"_mm_comtrue_epu16(__m128i __A, __m128i __B)\n"
53998"{\n"
53999" return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_TRUE);\n"
54000"}\n"
54001"\n"
54002"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54003"_mm_comlt_epu32(__m128i __A, __m128i __B)\n"
54004"{\n"
54005" return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LT);\n"
54006"}\n"
54007"\n"
54008"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54009"_mm_comle_epu32(__m128i __A, __m128i __B)\n"
54010"{\n"
54011" return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LE);\n"
54012"}\n"
54013"\n"
54014"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54015"_mm_comgt_epu32(__m128i __A, __m128i __B)\n"
54016"{\n"
54017" return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GT);\n"
54018"}\n"
54019"\n"
54020"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54021"_mm_comge_epu32(__m128i __A, __m128i __B)\n"
54022"{\n"
54023" return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GE);\n"
54024"}\n"
54025"\n"
54026"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54027"_mm_comeq_epu32(__m128i __A, __m128i __B)\n"
54028"{\n"
54029" return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_EQ);\n"
54030"}\n"
54031"\n"
54032"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54033"_mm_comneq_epu32(__m128i __A, __m128i __B)\n"
54034"{\n"
54035" return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_NEQ);\n"
54036"}\n"
54037"\n"
54038"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54039"_mm_comfalse_epu32(__m128i __A, __m128i __B)\n"
54040"{\n"
54041" return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_FALSE);\n"
54042"}\n"
54043"\n"
54044"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54045"_mm_comtrue_epu32(__m128i __A, __m128i __B)\n"
54046"{\n"
54047" return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_TRUE);\n"
54048"}\n"
54049"\n"
54050"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54051"_mm_comlt_epu64(__m128i __A, __m128i __B)\n"
54052"{\n"
54053" return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LT);\n"
54054"}\n"
54055"\n"
54056"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54057"_mm_comle_epu64(__m128i __A, __m128i __B)\n"
54058"{\n"
54059" return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LE);\n"
54060"}\n"
54061"\n"
54062"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54063"_mm_comgt_epu64(__m128i __A, __m128i __B)\n"
54064"{\n"
54065" return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GT);\n"
54066"}\n"
54067"\n"
54068"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54069"_mm_comge_epu64(__m128i __A, __m128i __B)\n"
54070"{\n"
54071" return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GE);\n"
54072"}\n"
54073"\n"
54074"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54075"_mm_comeq_epu64(__m128i __A, __m128i __B)\n"
54076"{\n"
54077" return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_EQ);\n"
54078"}\n"
54079"\n"
54080"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54081"_mm_comneq_epu64(__m128i __A, __m128i __B)\n"
54082"{\n"
54083" return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_NEQ);\n"
54084"}\n"
54085"\n"
54086"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54087"_mm_comfalse_epu64(__m128i __A, __m128i __B)\n"
54088"{\n"
54089" return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_FALSE);\n"
54090"}\n"
54091"\n"
54092"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54093"_mm_comtrue_epu64(__m128i __A, __m128i __B)\n"
54094"{\n"
54095" return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_TRUE);\n"
54096"}\n"
54097"\n"
54098"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54099"_mm_comlt_epi8(__m128i __A, __m128i __B)\n"
54100"{\n"
54101" return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LT);\n"
54102"}\n"
54103"\n"
54104"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54105"_mm_comle_epi8(__m128i __A, __m128i __B)\n"
54106"{\n"
54107" return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LE);\n"
54108"}\n"
54109"\n"
54110"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54111"_mm_comgt_epi8(__m128i __A, __m128i __B)\n"
54112"{\n"
54113" return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GT);\n"
54114"}\n"
54115"\n"
54116"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54117"_mm_comge_epi8(__m128i __A, __m128i __B)\n"
54118"{\n"
54119" return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GE);\n"
54120"}\n"
54121"\n"
54122"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54123"_mm_comeq_epi8(__m128i __A, __m128i __B)\n"
54124"{\n"
54125" return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_EQ);\n"
54126"}\n"
54127"\n"
54128"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54129"_mm_comneq_epi8(__m128i __A, __m128i __B)\n"
54130"{\n"
54131" return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_NEQ);\n"
54132"}\n"
54133"\n"
54134"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54135"_mm_comfalse_epi8(__m128i __A, __m128i __B)\n"
54136"{\n"
54137" return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_FALSE);\n"
54138"}\n"
54139"\n"
54140"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54141"_mm_comtrue_epi8(__m128i __A, __m128i __B)\n"
54142"{\n"
54143" return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_TRUE);\n"
54144"}\n"
54145"\n"
54146"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54147"_mm_comlt_epi16(__m128i __A, __m128i __B)\n"
54148"{\n"
54149" return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LT);\n"
54150"}\n"
54151"\n"
54152"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54153"_mm_comle_epi16(__m128i __A, __m128i __B)\n"
54154"{\n"
54155" return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LE);\n"
54156"}\n"
54157"\n"
54158"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54159"_mm_comgt_epi16(__m128i __A, __m128i __B)\n"
54160"{\n"
54161" return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GT);\n"
54162"}\n"
54163"\n"
54164"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54165"_mm_comge_epi16(__m128i __A, __m128i __B)\n"
54166"{\n"
54167" return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GE);\n"
54168"}\n"
54169"\n"
54170"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54171"_mm_comeq_epi16(__m128i __A, __m128i __B)\n"
54172"{\n"
54173" return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_EQ);\n"
54174"}\n"
54175"\n"
54176"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54177"_mm_comneq_epi16(__m128i __A, __m128i __B)\n"
54178"{\n"
54179" return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_NEQ);\n"
54180"}\n"
54181"\n"
54182"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54183"_mm_comfalse_epi16(__m128i __A, __m128i __B)\n"
54184"{\n"
54185" return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_FALSE);\n"
54186"}\n"
54187"\n"
54188"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54189"_mm_comtrue_epi16(__m128i __A, __m128i __B)\n"
54190"{\n"
54191" return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_TRUE);\n"
54192"}\n"
54193"\n"
54194"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54195"_mm_comlt_epi32(__m128i __A, __m128i __B)\n"
54196"{\n"
54197" return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LT);\n"
54198"}\n"
54199"\n"
54200"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54201"_mm_comle_epi32(__m128i __A, __m128i __B)\n"
54202"{\n"
54203" return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LE);\n"
54204"}\n"
54205"\n"
54206"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54207"_mm_comgt_epi32(__m128i __A, __m128i __B)\n"
54208"{\n"
54209" return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GT);\n"
54210"}\n"
54211"\n"
54212"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54213"_mm_comge_epi32(__m128i __A, __m128i __B)\n"
54214"{\n"
54215" return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GE);\n"
54216"}\n"
54217"\n"
54218"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54219"_mm_comeq_epi32(__m128i __A, __m128i __B)\n"
54220"{\n"
54221" return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_EQ);\n"
54222"}\n"
54223"\n"
54224"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54225"_mm_comneq_epi32(__m128i __A, __m128i __B)\n"
54226"{\n"
54227" return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_NEQ);\n"
54228"}\n"
54229"\n"
54230"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54231"_mm_comfalse_epi32(__m128i __A, __m128i __B)\n"
54232"{\n"
54233" return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_FALSE);\n"
54234"}\n"
54235"\n"
54236"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54237"_mm_comtrue_epi32(__m128i __A, __m128i __B)\n"
54238"{\n"
54239" return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_TRUE);\n"
54240"}\n"
54241"\n"
54242"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54243"_mm_comlt_epi64(__m128i __A, __m128i __B)\n"
54244"{\n"
54245" return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LT);\n"
54246"}\n"
54247"\n"
54248"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54249"_mm_comle_epi64(__m128i __A, __m128i __B)\n"
54250"{\n"
54251" return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LE);\n"
54252"}\n"
54253"\n"
54254"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54255"_mm_comgt_epi64(__m128i __A, __m128i __B)\n"
54256"{\n"
54257" return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GT);\n"
54258"}\n"
54259"\n"
54260"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54261"_mm_comge_epi64(__m128i __A, __m128i __B)\n"
54262"{\n"
54263" return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GE);\n"
54264"}\n"
54265"\n"
54266"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54267"_mm_comeq_epi64(__m128i __A, __m128i __B)\n"
54268"{\n"
54269" return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_EQ);\n"
54270"}\n"
54271"\n"
54272"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54273"_mm_comneq_epi64(__m128i __A, __m128i __B)\n"
54274"{\n"
54275" return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_NEQ);\n"
54276"}\n"
54277"\n"
54278"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54279"_mm_comfalse_epi64(__m128i __A, __m128i __B)\n"
54280"{\n"
54281" return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_FALSE);\n"
54282"}\n"
54283"\n"
54284"static __inline__ __m128i __DEFAULT_FN_ATTRS\n"
54285"_mm_comtrue_epi64(__m128i __A, __m128i __B)\n"
54286"{\n"
54287" return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE);\n"
54288"}\n"
54289"\n"
54290"#define _mm_permute2_pd(X, Y, C, I) \\\n"
54291" (__m128d)__builtin_ia32_vpermil2pd((__v2df)(__m128d)(X), \\\n"
54292" (__v2df)(__m128d)(Y), \\\n"
54293" (__v2di)(__m128i)(C), (I))\n"
54294"\n"
54295"#define _mm256_permute2_pd(X, Y, C, I) \\\n"
54296" (__m256d)__builtin_ia32_vpermil2pd256((__v4df)(__m256d)(X), \\\n"
54297" (__v4df)(__m256d)(Y), \\\n"
54298" (__v4di)(__m256i)(C), (I))\n"
54299"\n"
54300"#define _mm_permute2_ps(X, Y, C, I) \\\n"
54301" (__m128)__builtin_ia32_vpermil2ps((__v4sf)(__m128)(X), (__v4sf)(__m128)(Y), \\\n"
54302" (__v4si)(__m128i)(C), (I))\n"
54303"\n"
54304"#define _mm256_permute2_ps(X, Y, C, I) \\\n"
54305" (__m256)__builtin_ia32_vpermil2ps256((__v8sf)(__m256)(X), \\\n"
54306" (__v8sf)(__m256)(Y), \\\n"
54307" (__v8si)(__m256i)(C), (I))\n"
54308"\n"
54309"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
54310"_mm_frcz_ss(__m128 __A)\n"
54311"{\n"
54312" return (__m128)__builtin_ia32_vfrczss((__v4sf)__A);\n"
54313"}\n"
54314"\n"
54315"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
54316"_mm_frcz_sd(__m128d __A)\n"
54317"{\n"
54318" return (__m128d)__builtin_ia32_vfrczsd((__v2df)__A);\n"
54319"}\n"
54320"\n"
54321"static __inline__ __m128 __DEFAULT_FN_ATTRS\n"
54322"_mm_frcz_ps(__m128 __A)\n"
54323"{\n"
54324" return (__m128)__builtin_ia32_vfrczps((__v4sf)__A);\n"
54325"}\n"
54326"\n"
54327"static __inline__ __m128d __DEFAULT_FN_ATTRS\n"
54328"_mm_frcz_pd(__m128d __A)\n"
54329"{\n"
54330" return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A);\n"
54331"}\n"
54332"\n"
54333"static __inline__ __m256 __DEFAULT_FN_ATTRS256\n"
54334"_mm256_frcz_ps(__m256 __A)\n"
54335"{\n"
54336" return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A);\n"
54337"}\n"
54338"\n"
54339"static __inline__ __m256d __DEFAULT_FN_ATTRS256\n"
54340"_mm256_frcz_pd(__m256d __A)\n"
54341"{\n"
54342" return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A);\n"
54343"}\n"
54344"\n"
54345"#undef __DEFAULT_FN_ATTRS\n"
54346"#undef __DEFAULT_FN_ATTRS256\n"
54347"\n"
54348"#endif /* __XOPINTRIN_H */\n"
54349"" } ,
54350 { "/builtins/xsavecintrin.h" , "/*===---- xsavecintrin.h - XSAVEC intrinsic --------------------------------===\n"
54351" *\n"
54352" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
54353" * of this software and associated documentation files (the \"Software\"), to deal\n"
54354" * in the Software without restriction, including without limitation the rights\n"
54355" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
54356" * copies of the Software, and to permit persons to whom the Software is\n"
54357" * furnished to do so, subject to the following conditions:\n"
54358" *\n"
54359" * The above copyright notice and this permission notice shall be included in\n"
54360" * all copies or substantial portions of the Software.\n"
54361" *\n"
54362" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
54363" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
54364" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
54365" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
54366" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
54367" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
54368" * THE SOFTWARE.\n"
54369" *\n"
54370" *===-----------------------------------------------------------------------===\n"
54371" */\n"
54372"\n"
54373"#ifndef __IMMINTRIN_H\n"
54374"#error \"Never use <xsavecintrin.h> directly; include <immintrin.h> instead.\"\n"
54375"#endif\n"
54376"\n"
54377"#ifndef __XSAVECINTRIN_H\n"
54378"#define __XSAVECINTRIN_H\n"
54379"\n"
54380"/* Define the default attributes for the functions in this file. */\n"
54381"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"xsavec\")))\n"
54382"\n"
54383"static __inline__ void __DEFAULT_FN_ATTRS\n"
54384"_xsavec(void *__p, unsigned long long __m) {\n"
54385" __builtin_ia32_xsavec(__p, __m);\n"
54386"}\n"
54387"\n"
54388"#ifdef __x86_64__\n"
54389"static __inline__ void __DEFAULT_FN_ATTRS\n"
54390"_xsavec64(void *__p, unsigned long long __m) {\n"
54391" __builtin_ia32_xsavec64(__p, __m);\n"
54392"}\n"
54393"#endif\n"
54394"\n"
54395"#undef __DEFAULT_FN_ATTRS\n"
54396"\n"
54397"#endif\n"
54398"" } ,
54399 { "/builtins/xsaveintrin.h" , "/*===---- xsaveintrin.h - XSAVE intrinsic ----------------------------------===\n"
54400" *\n"
54401" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
54402" * of this software and associated documentation files (the \"Software\"), to deal\n"
54403" * in the Software without restriction, including without limitation the rights\n"
54404" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
54405" * copies of the Software, and to permit persons to whom the Software is\n"
54406" * furnished to do so, subject to the following conditions:\n"
54407" *\n"
54408" * The above copyright notice and this permission notice shall be included in\n"
54409" * all copies or substantial portions of the Software.\n"
54410" *\n"
54411" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
54412" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
54413" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
54414" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
54415" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
54416" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
54417" * THE SOFTWARE.\n"
54418" *\n"
54419" *===-----------------------------------------------------------------------===\n"
54420" */\n"
54421"\n"
54422"#ifndef __IMMINTRIN_H\n"
54423"#error \"Never use <xsaveintrin.h> directly; include <immintrin.h> instead.\"\n"
54424"#endif\n"
54425"\n"
54426"#ifndef __XSAVEINTRIN_H\n"
54427"#define __XSAVEINTRIN_H\n"
54428"\n"
54429"/* Define the default attributes for the functions in this file. */\n"
54430"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"xsave\")))\n"
54431"\n"
54432"static __inline__ void __DEFAULT_FN_ATTRS\n"
54433"_xsave(void *__p, unsigned long long __m) {\n"
54434" __builtin_ia32_xsave(__p, __m);\n"
54435"}\n"
54436"\n"
54437"static __inline__ void __DEFAULT_FN_ATTRS\n"
54438"_xrstor(void *__p, unsigned long long __m) {\n"
54439" __builtin_ia32_xrstor(__p, __m);\n"
54440"}\n"
54441"\n"
54442"#ifdef __x86_64__\n"
54443"static __inline__ void __DEFAULT_FN_ATTRS\n"
54444"_xsave64(void *__p, unsigned long long __m) {\n"
54445" __builtin_ia32_xsave64(__p, __m);\n"
54446"}\n"
54447"\n"
54448"static __inline__ void __DEFAULT_FN_ATTRS\n"
54449"_xrstor64(void *__p, unsigned long long __m) {\n"
54450" __builtin_ia32_xrstor64(__p, __m);\n"
54451"}\n"
54452"#endif\n"
54453"\n"
54454"#undef __DEFAULT_FN_ATTRS\n"
54455"\n"
54456"#endif\n"
54457"" } ,
54458 { "/builtins/xsaveoptintrin.h" , "/*===---- xsaveoptintrin.h - XSAVEOPT intrinsic ----------------------------===\n"
54459" *\n"
54460" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
54461" * of this software and associated documentation files (the \"Software\"), to deal\n"
54462" * in the Software without restriction, including without limitation the rights\n"
54463" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
54464" * copies of the Software, and to permit persons to whom the Software is\n"
54465" * furnished to do so, subject to the following conditions:\n"
54466" *\n"
54467" * The above copyright notice and this permission notice shall be included in\n"
54468" * all copies or substantial portions of the Software.\n"
54469" *\n"
54470" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
54471" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
54472" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
54473" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
54474" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
54475" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
54476" * THE SOFTWARE.\n"
54477" *\n"
54478" *===-----------------------------------------------------------------------===\n"
54479" */\n"
54480"\n"
54481"#ifndef __IMMINTRIN_H\n"
54482"#error \"Never use <xsaveoptintrin.h> directly; include <immintrin.h> instead.\"\n"
54483"#endif\n"
54484"\n"
54485"#ifndef __XSAVEOPTINTRIN_H\n"
54486"#define __XSAVEOPTINTRIN_H\n"
54487"\n"
54488"/* Define the default attributes for the functions in this file. */\n"
54489"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"xsaveopt\")))\n"
54490"\n"
54491"static __inline__ void __DEFAULT_FN_ATTRS\n"
54492"_xsaveopt(void *__p, unsigned long long __m) {\n"
54493" __builtin_ia32_xsaveopt(__p, __m);\n"
54494"}\n"
54495"\n"
54496"#ifdef __x86_64__\n"
54497"static __inline__ void __DEFAULT_FN_ATTRS\n"
54498"_xsaveopt64(void *__p, unsigned long long __m) {\n"
54499" __builtin_ia32_xsaveopt64(__p, __m);\n"
54500"}\n"
54501"#endif\n"
54502"\n"
54503"#undef __DEFAULT_FN_ATTRS\n"
54504"\n"
54505"#endif\n"
54506"" } ,
54507 { "/builtins/xsavesintrin.h" , "/*===---- xsavesintrin.h - XSAVES intrinsic --------------------------------===\n"
54508" *\n"
54509" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
54510" * of this software and associated documentation files (the \"Software\"), to deal\n"
54511" * in the Software without restriction, including without limitation the rights\n"
54512" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
54513" * copies of the Software, and to permit persons to whom the Software is\n"
54514" * furnished to do so, subject to the following conditions:\n"
54515" *\n"
54516" * The above copyright notice and this permission notice shall be included in\n"
54517" * all copies or substantial portions of the Software.\n"
54518" *\n"
54519" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
54520" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
54521" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
54522" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
54523" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
54524" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
54525" * THE SOFTWARE.\n"
54526" *\n"
54527" *===-----------------------------------------------------------------------===\n"
54528" */\n"
54529"\n"
54530"#ifndef __IMMINTRIN_H\n"
54531"#error \"Never use <xsavesintrin.h> directly; include <immintrin.h> instead.\"\n"
54532"#endif\n"
54533"\n"
54534"#ifndef __XSAVESINTRIN_H\n"
54535"#define __XSAVESINTRIN_H\n"
54536"\n"
54537"/* Define the default attributes for the functions in this file. */\n"
54538"#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"xsaves\")))\n"
54539"\n"
54540"static __inline__ void __DEFAULT_FN_ATTRS\n"
54541"_xsaves(void *__p, unsigned long long __m) {\n"
54542" __builtin_ia32_xsaves(__p, __m);\n"
54543"}\n"
54544"\n"
54545"static __inline__ void __DEFAULT_FN_ATTRS\n"
54546"_xrstors(void *__p, unsigned long long __m) {\n"
54547" __builtin_ia32_xrstors(__p, __m);\n"
54548"}\n"
54549"\n"
54550"#ifdef __x86_64__\n"
54551"static __inline__ void __DEFAULT_FN_ATTRS\n"
54552"_xrstors64(void *__p, unsigned long long __m) {\n"
54553" __builtin_ia32_xrstors64(__p, __m);\n"
54554"}\n"
54555"\n"
54556"static __inline__ void __DEFAULT_FN_ATTRS\n"
54557"_xsaves64(void *__p, unsigned long long __m) {\n"
54558" __builtin_ia32_xsaves64(__p, __m);\n"
54559"}\n"
54560"#endif\n"
54561"\n"
54562"#undef __DEFAULT_FN_ATTRS\n"
54563"\n"
54564"#endif\n"
54565"" } ,
54566 { "/builtins/xtestintrin.h" , "/*===---- xtestintrin.h - XTEST intrinsic ----------------------------------===\n"
54567" *\n"
54568" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
54569" * of this software and associated documentation files (the \"Software\"), to deal\n"
54570" * in the Software without restriction, including without limitation the rights\n"
54571" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
54572" * copies of the Software, and to permit persons to whom the Software is\n"
54573" * furnished to do so, subject to the following conditions:\n"
54574" *\n"
54575" * The above copyright notice and this permission notice shall be included in\n"
54576" * all copies or substantial portions of the Software.\n"
54577" *\n"
54578" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
54579" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
54580" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
54581" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
54582" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
54583" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n"
54584" * THE SOFTWARE.\n"
54585" *\n"
54586" *===-----------------------------------------------------------------------===\n"
54587" */\n"
54588"\n"
54589"#ifndef __IMMINTRIN_H\n"
54590"#error \"Never use <xtestintrin.h> directly; include <immintrin.h> instead.\"\n"
54591"#endif\n"
54592"\n"
54593"#ifndef __XTESTINTRIN_H\n"
54594"#define __XTESTINTRIN_H\n"
54595"\n"
54596"/* xtest returns non-zero if the instruction is executed within an RTM or active\n"
54597" * HLE region. */\n"
54598"/* FIXME: This can be an either or for RTM/HLE. Deal with this when HLE is\n"
54599" * supported. */\n"
54600"static __inline__ int\n"
54601" __attribute__((__always_inline__, __nodebug__, __target__(\"rtm\")))\n"
54602" _xtest(void) {\n"
54603" return __builtin_ia32_xtest();\n"
54604"}\n"
54605"\n"
54606"#endif\n"
54607"" } ,
54608
54609 {}
54610};
54611
54612
54613